2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* NativeAPI.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
|
|
|
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
2019-02-18 07:41:16 +08:00
|
|
|
#include "fdbclient/NativeAPI.actor.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
|
2020-01-27 12:53:50 +08:00
|
|
|
#include <algorithm>
|
2019-06-20 11:41:53 +08:00
|
|
|
#include <iterator>
|
2020-04-01 07:42:00 +08:00
|
|
|
#include <regex>
|
2020-03-25 00:48:03 +08:00
|
|
|
#include <unordered_set>
|
2020-01-27 12:53:50 +08:00
|
|
|
#include <tuple>
|
2020-01-08 03:42:57 +08:00
|
|
|
#include <utility>
|
|
|
|
#include <vector>
|
|
|
|
|
2020-02-25 09:47:33 +08:00
|
|
|
#include "fdbclient/FDBTypes.h"
|
|
|
|
#include "fdbrpc/FailureMonitor.h"
|
|
|
|
#include "fdbrpc/MultiInterface.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
|
2021-04-21 08:51:38 +08:00
|
|
|
#include "fdbclient/ActorLineageProfiler.h"
|
2021-04-07 08:32:02 +08:00
|
|
|
#include "fdbclient/AnnotateActor.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbclient/Atomic.h"
|
|
|
|
#include "fdbclient/ClusterInterface.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "fdbclient/CoordinationInterface.h"
|
|
|
|
#include "fdbclient/DatabaseContext.h"
|
2021-02-13 10:55:01 +08:00
|
|
|
#include "fdbclient/GlobalConfig.actor.h"
|
2020-06-28 05:48:21 +08:00
|
|
|
#include "fdbclient/JsonBuilder.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbclient/KeyRangeMap.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbclient/Knobs.h"
|
2019-07-01 07:09:10 +08:00
|
|
|
#include "fdbclient/ManagementAPI.actor.h"
|
2020-09-11 08:44:15 +08:00
|
|
|
#include "fdbclient/CommitProxyInterface.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "fdbclient/MonitorLeader.h"
|
|
|
|
#include "fdbclient/MutationList.h"
|
2020-03-25 00:48:03 +08:00
|
|
|
#include "fdbclient/ReadYourWrites.h"
|
2020-03-04 10:35:04 +08:00
|
|
|
#include "fdbclient/SpecialKeySpace.actor.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "fdbclient/StorageServerInterface.h"
|
|
|
|
#include "fdbclient/SystemData.h"
|
2021-04-21 05:10:01 +08:00
|
|
|
#include "fdbclient/TransactionLineage.h"
|
2020-07-08 00:06:13 +08:00
|
|
|
#include "fdbclient/versions.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "fdbrpc/LoadBalance.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
#include "fdbrpc/Net2FileSystem.h"
|
2018-06-21 07:24:37 +08:00
|
|
|
#include "fdbrpc/simulator.h"
|
2020-04-15 08:45:34 +08:00
|
|
|
#include "flow/Arena.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "flow/ActorCollection.h"
|
|
|
|
#include "flow/DeterministicRandom.h"
|
2020-04-15 08:45:34 +08:00
|
|
|
#include "flow/Error.h"
|
2020-07-10 01:49:33 +08:00
|
|
|
#include "flow/IRandom.h"
|
2020-04-15 08:45:34 +08:00
|
|
|
#include "flow/flow.h"
|
|
|
|
#include "flow/genericactors.actor.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "flow/Knobs.h"
|
|
|
|
#include "flow/Platform.h"
|
|
|
|
#include "flow/SystemMonitor.h"
|
2020-03-05 12:14:47 +08:00
|
|
|
#include "flow/TLSConfig.actor.h"
|
2020-07-08 00:06:13 +08:00
|
|
|
#include "flow/Tracing.h"
|
2019-06-20 11:41:53 +08:00
|
|
|
#include "flow/UnitTest.h"
|
2020-02-26 02:48:57 +08:00
|
|
|
#include "flow/serialize.h"
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
#ifdef WIN32
|
|
|
|
#define WIN32_LEAN_AND_MEAN
|
|
|
|
#include <Windows.h>
|
|
|
|
#undef min
|
|
|
|
#undef max
|
|
|
|
#else
|
|
|
|
#include <time.h>
|
|
|
|
#endif
|
2019-02-07 11:27:38 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-11-16 04:26:51 +08:00
|
|
|
extern const char* getSourceVersion();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-02-07 11:27:38 +08:00
|
|
|
using std::max;
|
|
|
|
using std::min;
|
2019-05-17 04:54:06 +08:00
|
|
|
using std::pair;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-01-28 06:13:54 +08:00
|
|
|
namespace {
|
|
|
|
|
2021-04-21 05:10:01 +08:00
|
|
|
TransactionLineageCollector transactionLineageCollector;
|
|
|
|
|
2020-01-28 06:13:54 +08:00
|
|
|
template <class Interface, class Request>
|
|
|
|
Future<REPLY_TYPE(Request)> loadBalance(
|
2021-03-11 02:06:03 +08:00
|
|
|
DatabaseContext* ctx,
|
|
|
|
const Reference<LocationInfo> alternatives,
|
|
|
|
RequestStream<Request> Interface::*channel,
|
|
|
|
const Request& request = Request(),
|
|
|
|
TaskPriority taskID = TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
bool atMostOnce = false, // if true, throws request_maybe_delivered() instead of retrying automatically
|
|
|
|
QueueModel* model = nullptr) {
|
2020-01-28 09:03:36 +08:00
|
|
|
if (alternatives->hasCaches) {
|
|
|
|
return loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model);
|
|
|
|
}
|
2020-08-07 04:06:50 +08:00
|
|
|
return fmap(
|
|
|
|
[ctx](auto const& res) {
|
|
|
|
if (res.cached) {
|
|
|
|
ctx->updateCache.trigger();
|
|
|
|
}
|
|
|
|
return res;
|
|
|
|
},
|
|
|
|
loadBalance(alternatives->locations(), channel, request, taskID, atMostOnce, model));
|
2020-01-28 06:13:54 +08:00
|
|
|
}
|
|
|
|
} // namespace
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
NetworkOptions networkOptions;
|
2020-03-05 12:14:47 +08:00
|
|
|
TLSConfig tlsConfig(TLSEndpointType::CLIENT);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-02-29 01:35:21 +08:00
|
|
|
// The default values, TRACE_DEFAULT_ROLL_SIZE and TRACE_DEFAULT_MAX_LOGS_SIZE are located in Trace.h.
|
|
|
|
NetworkOptions::NetworkOptions()
|
2021-03-11 02:06:03 +08:00
|
|
|
: localAddress(""), clusterFile(""), traceDirectory(Optional<std::string>()), traceRollSize(TRACE_DEFAULT_ROLL_SIZE),
|
|
|
|
traceMaxLogsSize(TRACE_DEFAULT_MAX_LOGS_SIZE), traceLogGroup("default"), traceFormat("xml"),
|
|
|
|
traceClockSource("now"), runLoopProfilingEnabled(false),
|
|
|
|
supportedVersions(new ReferencedObject<Standalone<VectorRef<ClientVersionRef>>>()) {}
|
2018-08-01 07:09:11 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
static const Key CLIENT_LATENCY_INFO_PREFIX = LiteralStringRef("client_latency/");
|
|
|
|
static const Key CLIENT_LATENCY_INFO_CTR_PREFIX = LiteralStringRef("client_latency_counter/");
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<StorageServerInfo> StorageServerInfo::getInterface(DatabaseContext* cx,
|
|
|
|
StorageServerInterface const& ssi,
|
|
|
|
LocalityData const& locality) {
|
|
|
|
auto it = cx->server_interf.find(ssi.id());
|
|
|
|
if (it != cx->server_interf.end()) {
|
|
|
|
if (it->second->interf.getValue.getEndpoint().token != ssi.getValue.getEndpoint().token) {
|
|
|
|
if (it->second->interf.locality == ssi.locality) {
|
|
|
|
// FIXME: load balance holds pointers to individual members of the interface, and this assignment will
|
|
|
|
// swap out the object they are
|
|
|
|
// pointing to. This is technically correct, but is very unnatural. We may want to refactor load
|
|
|
|
// balance to take an AsyncVar<Reference<Interface>> so that it is notified when the interface
|
|
|
|
// changes.
|
2018-11-03 04:15:09 +08:00
|
|
|
it->second->interf = ssi;
|
|
|
|
} else {
|
|
|
|
it->second->notifyContextDestroyed();
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<StorageServerInfo> loc(new StorageServerInfo(cx, ssi, locality));
|
|
|
|
cx->server_interf[ssi.id()] = loc.getPtr();
|
2018-11-03 04:15:09 +08:00
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return Reference<StorageServerInfo>::addRef(it->second);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<StorageServerInfo> loc(new StorageServerInfo(cx, ssi, locality));
|
|
|
|
cx->server_interf[ssi.id()] = loc.getPtr();
|
2017-05-26 04:48:44 +08:00
|
|
|
return loc;
|
|
|
|
}
|
|
|
|
|
2018-11-03 04:15:09 +08:00
|
|
|
void StorageServerInfo::notifyContextDestroyed() {
|
2020-09-21 02:33:09 +08:00
|
|
|
cx = nullptr;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2018-11-03 04:15:09 +08:00
|
|
|
StorageServerInfo::~StorageServerInfo() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (cx) {
|
|
|
|
auto it = cx->server_interf.find(interf.id());
|
|
|
|
if (it != cx->server_interf.end())
|
|
|
|
cx->server_interf.erase(it);
|
2020-09-21 02:33:09 +08:00
|
|
|
cx = nullptr;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const VectorRef<KeyValueRef>& val) {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::string s;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < val.size(); i++)
|
|
|
|
s = s + printable(val[i].key) + format(":%d ", val[i].value.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const KeyValueRef& val) {
|
|
|
|
return printable(val.key) + format(":%d ", val.value.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const VectorRef<StringRef>& val) {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::string s;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < val.size(); i++)
|
2017-05-26 04:48:44 +08:00
|
|
|
s = s + printable(val[i]) + " ";
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const StringRef& val) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return val.printable();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const std::string& str) {
|
2018-05-02 06:35:48 +08:00
|
|
|
return StringRef(str).printable();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string printable(const KeyRangeRef& range) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return printable(range.begin) + " - " + printable(range.end);
|
|
|
|
}
|
|
|
|
|
2020-08-30 10:53:04 +08:00
|
|
|
std::string printable(const VectorRef<KeyRangeRef>& val) {
|
|
|
|
std::string s;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < val.size(); i++)
|
|
|
|
s = s + printable(val[i]) + " ";
|
2020-08-30 10:53:04 +08:00
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
int unhex(char c) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (c >= '0' && c <= '9')
|
2021-03-11 02:06:03 +08:00
|
|
|
return c - '0';
|
2017-05-26 04:48:44 +08:00
|
|
|
if (c >= 'a' && c <= 'f')
|
2021-03-11 02:06:03 +08:00
|
|
|
return c - 'a' + 10;
|
2017-05-26 04:48:44 +08:00
|
|
|
if (c >= 'A' && c <= 'F')
|
2021-03-11 02:06:03 +08:00
|
|
|
return c - 'A' + 10;
|
2017-05-26 04:48:44 +08:00
|
|
|
UNREACHABLE();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
std::string unprintable(std::string const& val) {
|
2017-05-26 04:48:44 +08:00
|
|
|
std::string s;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < val.size(); i++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
char c = val[i];
|
2021-03-11 02:06:03 +08:00
|
|
|
if (c == '\\') {
|
|
|
|
if (++i == val.size())
|
|
|
|
ASSERT(false);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (val[i] == '\\') {
|
|
|
|
s += '\\';
|
|
|
|
} else if (val[i] == 'x') {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (i + 2 >= val.size())
|
|
|
|
ASSERT(false);
|
|
|
|
s += char((unhex(val[i + 1]) << 4) + unhex(val[i + 2]));
|
2017-05-26 04:48:44 +08:00
|
|
|
i += 2;
|
|
|
|
} else
|
|
|
|
ASSERT(false);
|
|
|
|
} else
|
|
|
|
s += c;
|
|
|
|
}
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
2019-04-23 06:48:47 +08:00
|
|
|
void DatabaseContext::validateVersion(Version version) {
|
2019-05-25 01:51:08 +08:00
|
|
|
// Version could be 0 if the INITIALIZE_NEW_DATABASE option is set. In that case, it is illegal to perform any
|
|
|
|
// reads. We throw client_invalid_operation because the caller didn't directly set the version, so the
|
|
|
|
// version_invalid error might be confusing.
|
|
|
|
if (version == 0) {
|
2017-05-26 04:48:44 +08:00
|
|
|
throw client_invalid_operation();
|
|
|
|
}
|
2019-05-25 01:51:08 +08:00
|
|
|
if (switchable && version < minAcceptableReadVersion) {
|
2019-04-23 06:48:47 +08:00
|
|
|
TEST(true); // Attempted to read a version lower than any this client has seen from the current cluster
|
|
|
|
throw transaction_too_old();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
ASSERT(version > 0 || version == latestVersion);
|
|
|
|
}
|
|
|
|
|
|
|
|
void validateOptionValue(Optional<StringRef> value, bool shouldBePresent) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (shouldBePresent && !value.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
throw invalid_option_value();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!shouldBePresent && value.present() && value.get().size() > 0)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void dumpMutations(const MutationListRef& mutations) {
|
|
|
|
for (auto m = mutations.begin(); m; ++m) {
|
2017-05-26 04:48:44 +08:00
|
|
|
switch (m->type) {
|
2021-03-11 02:06:03 +08:00
|
|
|
case MutationRef::SetValue:
|
|
|
|
printf(" '%s' := '%s'\n", printable(m->param1).c_str(), printable(m->param2).c_str());
|
|
|
|
break;
|
|
|
|
case MutationRef::AddValue:
|
|
|
|
printf(" '%s' += '%s'", printable(m->param1).c_str(), printable(m->param2).c_str());
|
|
|
|
break;
|
|
|
|
case MutationRef::ClearRange:
|
|
|
|
printf(" Clear ['%s','%s')\n", printable(m->param1).c_str(), printable(m->param2).c_str());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
printf(" Unknown mutation %d('%s','%s')\n",
|
|
|
|
m->type,
|
|
|
|
printable(m->param1).c_str(),
|
|
|
|
printable(m->param2).c_str());
|
|
|
|
break;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
template <>
|
|
|
|
void addref(DatabaseContext* ptr) {
|
|
|
|
ptr->addref();
|
|
|
|
}
|
|
|
|
template <>
|
|
|
|
void delref(DatabaseContext* ptr) {
|
|
|
|
ptr->delref();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> databaseLogger(DatabaseContext* cx) {
|
2019-07-09 05:01:04 +08:00
|
|
|
state double lastLogged = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2020-03-14 07:20:23 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
|
2019-07-09 05:01:04 +08:00
|
|
|
TraceEvent ev("TransactionMetrics", cx->dbId);
|
|
|
|
|
|
|
|
ev.detail("Elapsed", (lastLogged == 0) ? 0 : now() - lastLogged)
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Cluster",
|
|
|
|
cx->getConnectionFile() ? cx->getConnectionFile()->getConnectionString().clusterKeyName().toString()
|
|
|
|
: "")
|
|
|
|
.detail("Internal", cx->internal);
|
2019-07-09 05:01:04 +08:00
|
|
|
|
|
|
|
cx->cc.logToTraceEvent(ev);
|
|
|
|
|
|
|
|
ev.detail("MeanLatency", cx->latencies.mean())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("MedianLatency", cx->latencies.median())
|
|
|
|
.detail("Latency90", cx->latencies.percentile(0.90))
|
|
|
|
.detail("Latency98", cx->latencies.percentile(0.98))
|
|
|
|
.detail("MaxLatency", cx->latencies.max())
|
|
|
|
.detail("MeanRowReadLatency", cx->readLatencies.mean())
|
|
|
|
.detail("MedianRowReadLatency", cx->readLatencies.median())
|
|
|
|
.detail("MaxRowReadLatency", cx->readLatencies.max())
|
|
|
|
.detail("MeanGRVLatency", cx->GRVLatencies.mean())
|
|
|
|
.detail("MedianGRVLatency", cx->GRVLatencies.median())
|
|
|
|
.detail("MaxGRVLatency", cx->GRVLatencies.max())
|
|
|
|
.detail("MeanCommitLatency", cx->commitLatencies.mean())
|
|
|
|
.detail("MedianCommitLatency", cx->commitLatencies.median())
|
|
|
|
.detail("MaxCommitLatency", cx->commitLatencies.max())
|
|
|
|
.detail("MeanMutationsPerCommit", cx->mutationsPerCommit.mean())
|
|
|
|
.detail("MedianMutationsPerCommit", cx->mutationsPerCommit.median())
|
|
|
|
.detail("MaxMutationsPerCommit", cx->mutationsPerCommit.max())
|
|
|
|
.detail("MeanBytesPerCommit", cx->bytesPerCommit.mean())
|
|
|
|
.detail("MedianBytesPerCommit", cx->bytesPerCommit.median())
|
|
|
|
.detail("MaxBytesPerCommit", cx->bytesPerCommit.max());
|
2019-07-09 05:01:04 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
cx->latencies.clear();
|
|
|
|
cx->readLatencies.clear();
|
|
|
|
cx->GRVLatencies.clear();
|
|
|
|
cx->commitLatencies.clear();
|
|
|
|
cx->mutationsPerCommit.clear();
|
|
|
|
cx->bytesPerCommit.clear();
|
2019-07-09 05:01:04 +08:00
|
|
|
|
|
|
|
lastLogged = now();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
struct TrInfoChunk {
|
|
|
|
ValueRef value;
|
|
|
|
Key key;
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> transactionInfoCommitActor(Transaction* tr, std::vector<TrInfoChunk>* chunks) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state const Key clientLatencyAtomicCtr = CLIENT_LATENCY_INFO_CTR_PREFIX.withPrefix(fdbClientInfoPrefixRange.begin);
|
2019-07-03 02:16:00 +08:00
|
|
|
state int retryCount = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
loop {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
2021-03-11 02:06:03 +08:00
|
|
|
state Future<Standalone<StringRef>> vstamp = tr->getVersionstamp();
|
2017-05-26 04:48:44 +08:00
|
|
|
int64_t numCommitBytes = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& chunk : *chunks) {
|
2017-05-26 04:48:44 +08:00
|
|
|
tr->atomicOp(chunk.key, chunk.value, MutationRef::SetVersionstampedKey);
|
2021-03-11 02:06:03 +08:00
|
|
|
numCommitBytes += chunk.key.size() + chunk.value.size() -
|
|
|
|
4; // subtract number of bytes of key that denotes verstion stamp index
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
tr->atomicOp(clientLatencyAtomicCtr, StringRef((uint8_t*)&numCommitBytes, 8), MutationRef::AddValue);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2019-07-03 02:16:00 +08:00
|
|
|
retryCount++;
|
2019-07-03 02:18:45 +08:00
|
|
|
if (retryCount == 10)
|
2019-07-03 02:17:47 +08:00
|
|
|
throw;
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> delExcessClntTxnEntriesActor(Transaction* tr, int64_t clientTxInfoSizeLimit) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state const Key clientLatencyName = CLIENT_LATENCY_INFO_PREFIX.withPrefix(fdbClientInfoPrefixRange.begin);
|
|
|
|
state const Key clientLatencyAtomicCtr = CLIENT_LATENCY_INFO_CTR_PREFIX.withPrefix(fdbClientInfoPrefixRange.begin);
|
|
|
|
TraceEvent(SevInfo, "DelExcessClntTxnEntriesCalled");
|
2021-03-11 02:06:03 +08:00
|
|
|
loop {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
tr->reset();
|
|
|
|
tr->setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr->setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
|
|
Optional<Value> ctrValue = wait(tr->get(KeyRef(clientLatencyAtomicCtr), true));
|
|
|
|
if (!ctrValue.present()) {
|
|
|
|
TraceEvent(SevInfo, "NumClntTxnEntriesNotFound");
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
state int64_t txInfoSize = 0;
|
|
|
|
ASSERT(ctrValue.get().size() == sizeof(int64_t));
|
|
|
|
memcpy(&txInfoSize, ctrValue.get().begin(), ctrValue.get().size());
|
|
|
|
if (txInfoSize < clientTxInfoSizeLimit)
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
int getRangeByteLimit = (txInfoSize - clientTxInfoSizeLimit) < CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT
|
|
|
|
? (txInfoSize - clientTxInfoSizeLimit)
|
|
|
|
: CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
|
2020-07-05 03:03:47 +08:00
|
|
|
GetRangeLimits limit(GetRangeLimits::ROW_LIMIT_UNLIMITED, getRangeByteLimit);
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult txEntries =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(tr->getRange(KeyRangeRef(clientLatencyName, strinc(clientLatencyName)), limit));
|
2017-05-26 04:48:44 +08:00
|
|
|
state int64_t numBytesToDel = 0;
|
|
|
|
KeyRef endKey;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& kv : txEntries) {
|
2017-05-26 04:48:44 +08:00
|
|
|
endKey = kv.key;
|
|
|
|
numBytesToDel += kv.key.size() + kv.value.size();
|
|
|
|
if (txInfoSize - numBytesToDel <= clientTxInfoSizeLimit)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (numBytesToDel) {
|
|
|
|
tr->clear(KeyRangeRef(txEntries[0].key, strinc(endKey)));
|
|
|
|
TraceEvent(SevInfo, "DeletingExcessCntTxnEntries").detail("BytesToBeDeleted", numBytesToDel);
|
|
|
|
int64_t bytesDel = -numBytesToDel;
|
|
|
|
tr->atomicOp(clientLatencyAtomicCtr, StringRef((uint8_t*)&bytesDel, 8), MutationRef::AddValue);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
if (txInfoSize - numBytesToDel <= clientTxInfoSizeLimit)
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(tr->onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-16 08:51:20 +08:00
|
|
|
// Delref and addref self to give self a chance to get destroyed.
|
|
|
|
ACTOR static Future<Void> refreshTransaction(DatabaseContext* self, Transaction* tr) {
|
|
|
|
*tr = Transaction();
|
|
|
|
wait(delay(0)); // Give ourselves the chance to get cancelled if self was destroyed
|
|
|
|
*tr = Transaction(Database(Reference<DatabaseContext>::addRef(self)));
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// The reason for getting a pointer to DatabaseContext instead of a reference counted object is because reference
|
|
|
|
// counting will increment reference count for DatabaseContext which holds the future of this actor. This creates a
|
|
|
|
// cyclic reference and hence this actor and Database object will not be destroyed at all.
|
|
|
|
ACTOR static Future<Void> clientStatusUpdateActor(DatabaseContext* cx) {
|
|
|
|
state const std::string clientLatencyName =
|
|
|
|
CLIENT_LATENCY_INFO_PREFIX.withPrefix(fdbClientInfoPrefixRange.begin).toString();
|
2017-05-26 04:48:44 +08:00
|
|
|
state Transaction tr;
|
|
|
|
state std::vector<TrInfoChunk> commitQ;
|
|
|
|
state int txBytes = 0;
|
|
|
|
|
|
|
|
loop {
|
2021-03-16 08:51:20 +08:00
|
|
|
// Need to make sure that we eventually destroy tr. We can't rely on getting cancelled to do this because of
|
|
|
|
// the cyclic reference to self.
|
|
|
|
wait(refreshTransaction(cx, &tr));
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
ASSERT(cx->clientStatusUpdater.outStatusQ.empty());
|
|
|
|
cx->clientStatusUpdater.inStatusQ.swap(cx->clientStatusUpdater.outStatusQ);
|
|
|
|
// Split Transaction Info into chunks
|
|
|
|
state std::vector<TrInfoChunk> trChunksQ;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& entry : cx->clientStatusUpdater.outStatusQ) {
|
|
|
|
auto& bw = entry.second;
|
|
|
|
int64_t value_size_limit = BUGGIFY
|
|
|
|
? deterministicRandom()->randomInt(1e3, CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
|
|
|
: CLIENT_KNOBS->VALUE_SIZE_LIMIT;
|
2017-05-26 04:48:44 +08:00
|
|
|
int num_chunks = (bw.getLength() + value_size_limit - 1) / value_size_limit;
|
2019-05-11 05:01:52 +08:00
|
|
|
std::string random_id = deterministicRandom()->randomAlphaNumeric(16);
|
2019-02-13 05:54:24 +08:00
|
|
|
std::string user_provided_id = entry.first.size() ? entry.first + "/" : "";
|
2017-05-26 04:48:44 +08:00
|
|
|
for (int i = 0; i < num_chunks; i++) {
|
|
|
|
TrInfoChunk chunk;
|
|
|
|
BinaryWriter chunkBW(Unversioned());
|
2021-03-11 02:06:03 +08:00
|
|
|
chunkBW << bigEndian32(i + 1) << bigEndian32(num_chunks);
|
|
|
|
chunk.key = KeyRef(clientLatencyName + std::string(10, '\x00') + "/" + random_id + "/" +
|
|
|
|
chunkBW.toValue().toString() + "/" + user_provided_id + std::string(4, '\x00'));
|
2018-03-27 02:50:25 +08:00
|
|
|
int32_t pos = littleEndian32(clientLatencyName.size());
|
|
|
|
memcpy(mutateString(chunk.key) + chunk.key.size() - sizeof(int32_t), &pos, sizeof(int32_t));
|
2017-05-26 04:48:44 +08:00
|
|
|
if (i == num_chunks - 1) {
|
2021-03-11 02:06:03 +08:00
|
|
|
chunk.value = ValueRef(static_cast<uint8_t*>(bw.getData()) + (i * value_size_limit),
|
|
|
|
bw.getLength() - (i * value_size_limit));
|
|
|
|
} else {
|
|
|
|
chunk.value =
|
|
|
|
ValueRef(static_cast<uint8_t*>(bw.getData()) + (i * value_size_limit), value_size_limit);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
trChunksQ.push_back(std::move(chunk));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Commit the chunks splitting into different transactions if needed
|
2021-03-11 02:06:03 +08:00
|
|
|
state int64_t dataSizeLimit =
|
|
|
|
BUGGIFY ? deterministicRandom()->randomInt(200e3, 1.5 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT)
|
|
|
|
: 0.8 * CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
|
2017-05-26 04:48:44 +08:00
|
|
|
state std::vector<TrInfoChunk>::iterator tracking_iter = trChunksQ.begin();
|
|
|
|
ASSERT(commitQ.empty() && (txBytes == 0));
|
|
|
|
loop {
|
|
|
|
state std::vector<TrInfoChunk>::iterator iter = tracking_iter;
|
|
|
|
txBytes = 0;
|
|
|
|
commitQ.clear();
|
|
|
|
try {
|
|
|
|
while (iter != trChunksQ.end()) {
|
|
|
|
if (iter->value.size() + iter->key.size() + txBytes > dataSizeLimit) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(transactionInfoCommitActor(&tr, &commitQ));
|
2017-05-26 04:48:44 +08:00
|
|
|
tracking_iter = iter;
|
|
|
|
commitQ.clear();
|
|
|
|
txBytes = 0;
|
|
|
|
}
|
|
|
|
commitQ.push_back(*iter);
|
|
|
|
txBytes += iter->value.size() + iter->key.size();
|
|
|
|
++iter;
|
|
|
|
}
|
|
|
|
if (!commitQ.empty()) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(transactionInfoCommitActor(&tr, &commitQ));
|
2017-05-26 04:48:44 +08:00
|
|
|
commitQ.clear();
|
|
|
|
txBytes = 0;
|
|
|
|
}
|
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (e.code() == error_code_transaction_too_large) {
|
|
|
|
dataSizeLimit /= 2;
|
|
|
|
ASSERT(dataSizeLimit >= CLIENT_KNOBS->VALUE_SIZE_LIMIT + CLIENT_KNOBS->KEY_SIZE_LIMIT);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent(SevWarnAlways, "ClientTrInfoErrorCommit").error(e).detail("TxBytes", txBytes);
|
2017-05-26 04:48:44 +08:00
|
|
|
commitQ.clear();
|
|
|
|
txBytes = 0;
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cx->clientStatusUpdater.outStatusQ.clear();
|
2021-02-24 08:17:05 +08:00
|
|
|
wait(GlobalConfig::globalConfig().onInitialized());
|
2021-03-17 08:20:25 +08:00
|
|
|
double sampleRate = GlobalConfig::globalConfig().get<double>(fdbClientInfoTxnSampleRate,
|
|
|
|
std::numeric_limits<double>::infinity());
|
|
|
|
double clientSamplingProbability =
|
|
|
|
std::isinf(sampleRate) ? CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY : sampleRate;
|
2021-02-25 03:23:29 +08:00
|
|
|
int64_t sizeLimit = GlobalConfig::globalConfig().get<int64_t>(fdbClientInfoTxnSizeLimit, -1);
|
2021-02-24 08:17:05 +08:00
|
|
|
int64_t clientTxnInfoSizeLimit = sizeLimit == -1 ? CLIENT_KNOBS->CSI_SIZE_LIMIT : sizeLimit;
|
2019-05-11 05:01:52 +08:00
|
|
|
if (!trChunksQ.empty() && deterministicRandom()->random01() < clientSamplingProbability)
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delExcessClntTxnEntriesActor(&tr, clientTxnInfoSizeLimit));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->CSI_STATUS_DELAY));
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
if (e.code() == error_code_actor_cancelled) {
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
cx->clientStatusUpdater.outStatusQ.clear();
|
|
|
|
TraceEvent(SevWarnAlways, "UnableToWriteClientStatus").error(e);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(10.0));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> monitorProxiesChange(Reference<AsyncVar<ClientDBInfo>> clientDBInfo,
|
|
|
|
AsyncTrigger* triggerVar) {
|
2020-09-11 08:44:15 +08:00
|
|
|
state vector<CommitProxyInterface> curCommitProxies;
|
2021-03-11 02:06:03 +08:00
|
|
|
state vector<GrvProxyInterface> curGrvProxies;
|
2020-09-11 08:44:15 +08:00
|
|
|
curCommitProxies = clientDBInfo->get().commitProxies;
|
2020-07-18 13:24:52 +08:00
|
|
|
curGrvProxies = clientDBInfo->get().grvProxies;
|
2018-06-21 00:21:23 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
loop {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(clientDBInfo->onChange());
|
2020-09-11 08:44:15 +08:00
|
|
|
if (clientDBInfo->get().commitProxies != curCommitProxies || clientDBInfo->get().grvProxies != curGrvProxies) {
|
|
|
|
curCommitProxies = clientDBInfo->get().commitProxies;
|
2020-07-18 13:24:52 +08:00
|
|
|
curGrvProxies = clientDBInfo->get().grvProxies;
|
2017-11-02 09:29:56 +08:00
|
|
|
triggerVar->trigger();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void updateLocationCacheWithCaches(DatabaseContext* self,
|
|
|
|
const std::map<UID, StorageServerInterface>& removed,
|
|
|
|
const std::map<UID, StorageServerInterface>& added) {
|
2020-01-27 12:53:50 +08:00
|
|
|
// TODO: this needs to be more clever in the future
|
|
|
|
auto ranges = self->locationCache.ranges();
|
|
|
|
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
2020-03-07 04:32:56 +08:00
|
|
|
if (iter->value() && iter->value()->hasCaches) {
|
2020-01-27 13:25:15 +08:00
|
|
|
auto& val = iter->value();
|
2020-01-27 12:53:50 +08:00
|
|
|
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> interfaces;
|
|
|
|
interfaces.reserve(val->size() - removed.size() + added.size());
|
|
|
|
for (int i = 0; i < val->size(); ++i) {
|
|
|
|
const auto& interf = (*val)[i];
|
|
|
|
if (removed.count(interf->interf.id()) == 0) {
|
|
|
|
interfaces.emplace_back(interf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for (const auto& p : added) {
|
2020-11-07 15:50:55 +08:00
|
|
|
interfaces.push_back(makeReference<ReferencedInterface<StorageServerInterface>>(p.second));
|
2020-01-27 12:53:50 +08:00
|
|
|
}
|
2020-11-07 15:50:55 +08:00
|
|
|
iter->value() = makeReference<LocationInfo>(interfaces, true);
|
2020-01-27 12:53:50 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-28 09:03:36 +08:00
|
|
|
Reference<LocationInfo> addCaches(const Reference<LocationInfo>& loc,
|
2021-03-11 02:06:03 +08:00
|
|
|
const std::vector<Reference<ReferencedInterface<StorageServerInterface>>>& other) {
|
2020-01-28 09:03:36 +08:00
|
|
|
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> interfaces;
|
|
|
|
interfaces.reserve(loc->size() + other.size());
|
|
|
|
for (int i = 0; i < loc->size(); ++i) {
|
|
|
|
interfaces.emplace_back((*loc)[i]);
|
|
|
|
}
|
|
|
|
interfaces.insert(interfaces.end(), other.begin(), other.end());
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<LocationInfo>(interfaces, true);
|
2020-01-28 09:03:36 +08:00
|
|
|
}
|
|
|
|
|
2020-01-28 06:13:54 +08:00
|
|
|
ACTOR Future<Void> updateCachedRanges(DatabaseContext* self, std::map<UID, StorageServerInterface>* cacheServers) {
|
2021-03-11 05:24:55 +08:00
|
|
|
state Transaction tr;
|
2020-01-28 06:13:54 +08:00
|
|
|
state Value trueValue = storageCacheValue(std::vector<uint16_t>{ 0 });
|
|
|
|
state Value falseValue = storageCacheValue(std::vector<uint16_t>{});
|
|
|
|
try {
|
|
|
|
loop {
|
2021-03-11 05:24:55 +08:00
|
|
|
// Need to make sure that we eventually destroy tr. We can't rely on getting cancelled to do this because of
|
|
|
|
// the cyclic reference to self.
|
|
|
|
tr = Transaction();
|
|
|
|
wait(delay(0)); // Give ourselves the chance to get cancelled if self was destroyed
|
2021-03-13 09:16:27 +08:00
|
|
|
wait(brokenPromiseToNever(self->updateCache.onTrigger())); // brokenPromiseToNever because self might get
|
|
|
|
// destroyed elsewhere while we're waiting here.
|
2021-03-11 05:24:55 +08:00
|
|
|
tr = Transaction(Database(Reference<DatabaseContext>::addRef(self)));
|
2020-01-28 06:13:54 +08:00
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
|
|
|
try {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult range = wait(tr.getRange(storageCacheKeys, CLIENT_KNOBS->TOO_MANY));
|
2020-01-28 06:13:54 +08:00
|
|
|
ASSERT(!range.more);
|
2020-01-28 09:03:36 +08:00
|
|
|
std::vector<Reference<ReferencedInterface<StorageServerInterface>>> cacheInterfaces;
|
|
|
|
cacheInterfaces.reserve(cacheServers->size());
|
|
|
|
for (const auto& p : *cacheServers) {
|
2020-11-07 15:50:55 +08:00
|
|
|
cacheInterfaces.push_back(makeReference<ReferencedInterface<StorageServerInterface>>(p.second));
|
2020-01-28 09:03:36 +08:00
|
|
|
}
|
2020-01-28 06:13:54 +08:00
|
|
|
bool currCached = false;
|
|
|
|
KeyRef begin, end;
|
|
|
|
for (const auto& kv : range) {
|
|
|
|
// These booleans have to flip consistently
|
|
|
|
ASSERT(currCached == (kv.value == falseValue));
|
|
|
|
if (kv.value == trueValue) {
|
2020-01-28 09:03:36 +08:00
|
|
|
begin = kv.key.substr(storageCacheKeys.begin.size());
|
2020-01-28 06:13:54 +08:00
|
|
|
currCached = true;
|
|
|
|
} else {
|
|
|
|
currCached = false;
|
2020-01-28 09:03:36 +08:00
|
|
|
end = kv.key.substr(storageCacheKeys.begin.size());
|
2021-03-11 02:06:03 +08:00
|
|
|
KeyRangeRef cachedRange{ begin, end };
|
2020-01-28 06:13:54 +08:00
|
|
|
auto ranges = self->locationCache.containedRanges(cachedRange);
|
2020-01-28 09:03:36 +08:00
|
|
|
KeyRef containedRangesBegin, containedRangesEnd, prevKey;
|
|
|
|
if (!ranges.empty()) {
|
|
|
|
containedRangesBegin = ranges.begin().range().begin;
|
2020-01-28 06:13:54 +08:00
|
|
|
}
|
|
|
|
for (auto iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
2020-01-28 09:03:36 +08:00
|
|
|
containedRangesEnd = iter->range().end;
|
2020-03-07 04:32:56 +08:00
|
|
|
if (iter->value() && !iter->value()->hasCaches) {
|
2020-01-28 09:03:36 +08:00
|
|
|
iter->value() = addCaches(iter->value(), cacheInterfaces);
|
2020-01-28 06:13:54 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-28 09:03:36 +08:00
|
|
|
auto iter = self->locationCache.rangeContaining(begin);
|
|
|
|
if (iter->value() && !iter->value()->hasCaches) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (end >= iter->range().end) {
|
2020-10-23 02:30:08 +08:00
|
|
|
Key endCopy = iter->range().end; // Copy because insertion invalidates iterator
|
|
|
|
self->locationCache.insert(KeyRangeRef{ begin, endCopy },
|
2021-03-11 02:06:03 +08:00
|
|
|
addCaches(iter->value(), cacheInterfaces));
|
2020-02-20 08:27:06 +08:00
|
|
|
} else {
|
|
|
|
self->locationCache.insert(KeyRangeRef{ begin, end },
|
2021-03-11 02:06:03 +08:00
|
|
|
addCaches(iter->value(), cacheInterfaces));
|
2020-02-20 08:27:06 +08:00
|
|
|
}
|
2020-01-28 09:03:36 +08:00
|
|
|
}
|
|
|
|
iter = self->locationCache.rangeContainingKeyBefore(end);
|
|
|
|
if (iter->value() && !iter->value()->hasCaches) {
|
2020-10-23 02:30:08 +08:00
|
|
|
Key beginCopy = iter->range().begin; // Copy because insertion invalidates iterator
|
2021-03-11 02:06:03 +08:00
|
|
|
self->locationCache.insert(KeyRangeRef{ beginCopy, end },
|
|
|
|
addCaches(iter->value(), cacheInterfaces));
|
2020-01-28 09:03:36 +08:00
|
|
|
}
|
2020-01-28 06:13:54 +08:00
|
|
|
}
|
|
|
|
}
|
2020-01-28 09:03:36 +08:00
|
|
|
wait(delay(2.0)); // we want to wait at least some small amount of time before
|
2020-01-28 06:13:54 +08:00
|
|
|
// updating this list again
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(SevError, "UpdateCachedRangesFailed").error(e);
|
2020-01-28 06:13:54 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
2020-01-27 12:53:50 +08:00
|
|
|
|
2021-03-11 05:24:55 +08:00
|
|
|
// The reason for getting a pointer to DatabaseContext instead of a reference counted object is because reference
|
|
|
|
// counting will increment reference count for DatabaseContext which holds the future of this actor. This creates a
|
|
|
|
// cyclic reference and hence this actor and Database object will not be destroyed at all.
|
2020-01-08 03:42:57 +08:00
|
|
|
ACTOR Future<Void> monitorCacheList(DatabaseContext* self) {
|
2021-03-11 05:24:55 +08:00
|
|
|
state Transaction tr;
|
2020-01-28 06:13:54 +08:00
|
|
|
state std::map<UID, StorageServerInterface> cacheServerMap;
|
2020-01-28 09:03:36 +08:00
|
|
|
state Future<Void> updateRanges = updateCachedRanges(self, &cacheServerMap);
|
2020-03-07 04:32:56 +08:00
|
|
|
// if no caches are configured, we don't want to run this actor at all
|
2020-01-28 06:13:54 +08:00
|
|
|
// so we just wait for the first trigger from a storage server
|
|
|
|
wait(self->updateCache.onTrigger());
|
2020-01-08 03:42:57 +08:00
|
|
|
try {
|
|
|
|
loop {
|
2021-03-11 05:24:55 +08:00
|
|
|
// Need to make sure that we eventually destroy tr. We can't rely on getting cancelled to do this because of
|
|
|
|
// the cyclic reference to self.
|
2021-03-16 08:51:20 +08:00
|
|
|
wait(refreshTransaction(self, &tr));
|
2020-01-08 03:42:57 +08:00
|
|
|
try {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult cacheList = wait(tr.getRange(storageCacheServerKeys, CLIENT_KNOBS->TOO_MANY));
|
2020-01-08 03:42:57 +08:00
|
|
|
ASSERT(!cacheList.more);
|
2020-01-27 12:53:50 +08:00
|
|
|
bool hasChanges = false;
|
|
|
|
std::map<UID, StorageServerInterface> allCacheServers;
|
2020-01-08 03:42:57 +08:00
|
|
|
for (auto kv : cacheList) {
|
2020-01-27 12:53:50 +08:00
|
|
|
auto ssi = BinaryReader::fromStringRef<StorageServerInterface>(kv.value, IncludeVersion());
|
|
|
|
allCacheServers.emplace(ssi.id(), ssi);
|
|
|
|
}
|
|
|
|
std::map<UID, StorageServerInterface> newCacheServers;
|
2020-01-28 06:13:54 +08:00
|
|
|
std::map<UID, StorageServerInterface> deletedCacheServers;
|
2021-03-11 02:06:03 +08:00
|
|
|
std::set_difference(allCacheServers.begin(),
|
|
|
|
allCacheServers.end(),
|
|
|
|
cacheServerMap.begin(),
|
|
|
|
cacheServerMap.end(),
|
|
|
|
std::insert_iterator<std::map<UID, StorageServerInterface>>(
|
|
|
|
newCacheServers, newCacheServers.begin()));
|
|
|
|
std::set_difference(cacheServerMap.begin(),
|
|
|
|
cacheServerMap.end(),
|
|
|
|
allCacheServers.begin(),
|
|
|
|
allCacheServers.end(),
|
|
|
|
std::insert_iterator<std::map<UID, StorageServerInterface>>(
|
|
|
|
deletedCacheServers, deletedCacheServers.begin()));
|
2020-01-28 06:13:54 +08:00
|
|
|
hasChanges = !(newCacheServers.empty() && deletedCacheServers.empty());
|
2020-01-27 12:53:50 +08:00
|
|
|
if (hasChanges) {
|
2020-01-28 06:13:54 +08:00
|
|
|
updateLocationCacheWithCaches(self, deletedCacheServers, newCacheServers);
|
2020-01-08 03:42:57 +08:00
|
|
|
}
|
2020-01-28 06:13:54 +08:00
|
|
|
cacheServerMap = std::move(allCacheServers);
|
2020-01-08 03:42:57 +08:00
|
|
|
wait(delay(5.0));
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
TraceEvent(SevError, "MonitorCacheListFailed").error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<HealthMetrics> getHealthMetricsActor(DatabaseContext* cx, bool detailed) {
|
2019-03-05 06:16:39 +08:00
|
|
|
if (now() - cx->healthMetricsLastUpdated < CLIENT_KNOBS->AGGREGATE_HEALTH_METRICS_MAX_STALENESS) {
|
|
|
|
if (detailed) {
|
|
|
|
return cx->healthMetrics;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2019-03-05 06:16:39 +08:00
|
|
|
HealthMetrics result;
|
|
|
|
result.update(cx->healthMetrics, false, false);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
state bool sendDetailedRequest =
|
|
|
|
detailed && now() - cx->detailedHealthMetricsLastUpdated > CLIENT_KNOBS->DETAILED_HEALTH_METRICS_MAX_STALENESS;
|
2019-03-03 10:01:23 +08:00
|
|
|
loop {
|
2019-03-05 06:16:39 +08:00
|
|
|
choose {
|
2020-07-23 03:20:22 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetHealthMetricsReply rep = wait(basicLoadBalance(cx->getGrvProxies(false),
|
|
|
|
&GrvProxyInterface::getHealthMetrics,
|
|
|
|
GetHealthMetricsRequest(sendDetailedRequest)))) {
|
2019-03-05 06:16:39 +08:00
|
|
|
cx->healthMetrics.update(rep.healthMetrics, detailed, true);
|
|
|
|
if (detailed) {
|
|
|
|
cx->healthMetricsLastUpdated = now();
|
|
|
|
cx->detailedHealthMetricsLastUpdated = now();
|
|
|
|
return cx->healthMetrics;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2019-03-05 06:16:39 +08:00
|
|
|
cx->healthMetricsLastUpdated = now();
|
|
|
|
HealthMetrics result;
|
|
|
|
result.update(cx->healthMetrics, false, false);
|
|
|
|
return result;
|
2019-02-23 04:30:36 +08:00
|
|
|
}
|
2019-02-02 05:23:10 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-03-05 06:16:39 +08:00
|
|
|
Future<HealthMetrics> DatabaseContext::getHealthMetrics(bool detailed = false) {
|
|
|
|
return getHealthMetricsActor(this, detailed);
|
|
|
|
}
|
2020-05-06 06:04:20 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void DatabaseContext::registerSpecialKeySpaceModule(SpecialKeySpace::MODULE module,
|
|
|
|
SpecialKeySpace::IMPLTYPE type,
|
|
|
|
std::unique_ptr<SpecialKeyRangeReadImpl>&& impl) {
|
2020-07-28 03:29:03 +08:00
|
|
|
specialKeySpace->registerKeyRange(module, type, impl->getKeyRange(), impl.get());
|
2020-05-12 18:29:17 +08:00
|
|
|
specialKeySpaceModules.push_back(std::move(impl));
|
2020-05-06 06:04:20 +08:00
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
ACTOR Future<RangeResult> getWorkerInterfaces(Reference<ClusterConnectionFile> clusterFile);
|
2020-05-06 07:36:48 +08:00
|
|
|
ACTOR Future<Optional<Value>> getJSON(Database db);
|
|
|
|
|
2020-06-16 03:47:27 +08:00
|
|
|
struct WorkerInterfacesSpecialKeyImpl : SpecialKeyRangeReadImpl {
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override {
|
2020-05-06 07:36:48 +08:00
|
|
|
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
|
2020-05-06 08:31:34 +08:00
|
|
|
Key prefix = Key(getKeyRange().begin);
|
2020-05-07 05:28:20 +08:00
|
|
|
return map(getWorkerInterfaces(ryw->getDatabase()->getConnectionFile()),
|
2021-05-04 04:14:16 +08:00
|
|
|
[prefix = prefix, kr = KeyRange(kr)](const RangeResult& in) {
|
|
|
|
RangeResult result;
|
2020-05-07 05:28:20 +08:00
|
|
|
for (const auto& [k_, v] : in) {
|
|
|
|
auto k = k_.withPrefix(prefix);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (kr.contains(k))
|
|
|
|
result.push_back_deep(result.arena(), KeyValueRef(k, v));
|
2020-05-07 05:28:20 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(result.begin(), result.end(), KeyValueRef::OrderByKey{});
|
|
|
|
return result;
|
|
|
|
});
|
2020-05-06 07:36:48 +08:00
|
|
|
} else {
|
2021-05-04 04:14:16 +08:00
|
|
|
return RangeResult();
|
2020-05-06 07:36:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-06-16 03:47:27 +08:00
|
|
|
explicit WorkerInterfacesSpecialKeyImpl(KeyRangeRef kr) : SpecialKeyRangeReadImpl(kr) {}
|
2020-05-06 07:36:48 +08:00
|
|
|
};
|
|
|
|
|
2020-06-16 03:47:27 +08:00
|
|
|
struct SingleSpecialKeyImpl : SpecialKeyRangeReadImpl {
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override {
|
2020-05-16 05:14:43 +08:00
|
|
|
ASSERT(kr.contains(k));
|
2020-05-06 07:36:48 +08:00
|
|
|
return map(f(ryw), [k = k](Optional<Value> v) {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult result;
|
2020-05-06 07:36:48 +08:00
|
|
|
if (v.present()) {
|
|
|
|
result.push_back_deep(result.arena(), KeyValueRef(k, v.get()));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2020-05-21 04:55:07 +08:00
|
|
|
SingleSpecialKeyImpl(KeyRef k, const std::function<Future<Optional<Value>>(ReadYourWritesTransaction*)>& f)
|
2020-06-16 03:47:27 +08:00
|
|
|
: SpecialKeyRangeReadImpl(singleKeyRange(k)), k(k), f(f) {}
|
2020-05-06 07:36:48 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
Key k;
|
2020-05-21 04:55:07 +08:00
|
|
|
std::function<Future<Optional<Value>>(ReadYourWritesTransaction*)> f;
|
2020-05-06 07:36:48 +08:00
|
|
|
};
|
|
|
|
|
2020-06-23 09:11:03 +08:00
|
|
|
class HealthMetricsRangeImpl : public SpecialKeyRangeAsyncImpl {
|
|
|
|
public:
|
|
|
|
explicit HealthMetricsRangeImpl(KeyRangeRef kr);
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const override;
|
2020-06-23 09:11:03 +08:00
|
|
|
};
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
static RangeResult healthMetricsToKVPairs(const HealthMetrics& metrics, KeyRangeRef kr) {
|
|
|
|
RangeResult result;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (CLIENT_BUGGIFY)
|
|
|
|
return result;
|
2020-06-29 04:56:44 +08:00
|
|
|
if (kr.contains(LiteralStringRef("\xff\xff/metrics/health/aggregate")) && metrics.worstStorageDurabilityLag != 0) {
|
2020-06-23 09:11:03 +08:00
|
|
|
json_spirit::mObject statsObj;
|
2020-07-01 00:58:55 +08:00
|
|
|
statsObj["batch_limited"] = metrics.batchLimited;
|
|
|
|
statsObj["tps_limit"] = metrics.tpsLimit;
|
|
|
|
statsObj["worst_storage_durability_lag"] = metrics.worstStorageDurabilityLag;
|
2020-11-14 09:24:57 +08:00
|
|
|
statsObj["limiting_storage_durability_lag"] = metrics.limitingStorageDurabilityLag;
|
2020-07-01 00:58:55 +08:00
|
|
|
statsObj["worst_storage_queue"] = metrics.worstStorageQueue;
|
2020-11-14 09:24:57 +08:00
|
|
|
statsObj["limiting_storage_queue"] = metrics.limitingStorageQueue;
|
2020-07-01 00:58:55 +08:00
|
|
|
statsObj["worst_log_queue"] = metrics.worstTLogQueue;
|
2020-06-23 09:11:03 +08:00
|
|
|
std::string statsString =
|
|
|
|
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
|
|
|
ValueRef bytes(result.arena(), statsString);
|
|
|
|
result.push_back(result.arena(), KeyValueRef(LiteralStringRef("\xff\xff/metrics/health/aggregate"), bytes));
|
|
|
|
}
|
|
|
|
// tlog stats
|
|
|
|
{
|
|
|
|
int phase = 0; // Avoid comparing twice per loop iteration
|
|
|
|
for (const auto& [uid, logStats] : metrics.tLogQueue) {
|
|
|
|
StringRef k{
|
|
|
|
StringRef(uid.toString()).withPrefix(LiteralStringRef("\xff\xff/metrics/health/log/"), result.arena())
|
|
|
|
};
|
|
|
|
if (phase == 0 && k >= kr.begin) {
|
|
|
|
phase = 1;
|
|
|
|
}
|
|
|
|
if (phase == 1) {
|
|
|
|
if (k < kr.end) {
|
|
|
|
json_spirit::mObject statsObj;
|
2020-07-01 00:58:55 +08:00
|
|
|
statsObj["log_queue"] = logStats;
|
2020-06-23 09:11:03 +08:00
|
|
|
std::string statsString =
|
|
|
|
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
|
|
|
ValueRef bytes(result.arena(), statsString);
|
|
|
|
result.push_back(result.arena(), KeyValueRef(k, bytes));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Storage stats
|
|
|
|
{
|
|
|
|
int phase = 0; // Avoid comparing twice per loop iteration
|
|
|
|
for (const auto& [uid, storageStats] : metrics.storageStats) {
|
|
|
|
StringRef k{ StringRef(uid.toString())
|
|
|
|
.withPrefix(LiteralStringRef("\xff\xff/metrics/health/storage/"), result.arena()) };
|
|
|
|
if (phase == 0 && k >= kr.begin) {
|
|
|
|
phase = 1;
|
|
|
|
}
|
|
|
|
if (phase == 1) {
|
|
|
|
if (k < kr.end) {
|
|
|
|
json_spirit::mObject statsObj;
|
2020-07-01 00:58:55 +08:00
|
|
|
statsObj["storage_durability_lag"] = storageStats.storageDurabilityLag;
|
|
|
|
statsObj["storage_queue"] = storageStats.storageQueue;
|
|
|
|
statsObj["cpu_usage"] = storageStats.cpuUsage;
|
|
|
|
statsObj["disk_usage"] = storageStats.diskUsage;
|
2020-06-23 09:11:03 +08:00
|
|
|
std::string statsString =
|
|
|
|
json_spirit::write_string(json_spirit::mValue(statsObj), json_spirit::Output_options::raw_utf8);
|
|
|
|
ValueRef bytes(result.arena(), statsString);
|
|
|
|
result.push_back(result.arena(), KeyValueRef(k, bytes));
|
|
|
|
} else {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
ACTOR static Future<RangeResult> healthMetricsGetRangeActor(ReadYourWritesTransaction* ryw, KeyRangeRef kr) {
|
2020-06-25 05:02:31 +08:00
|
|
|
HealthMetrics metrics = wait(ryw->getDatabase()->getHealthMetrics(
|
|
|
|
/*detailed ("per process")*/ kr.intersects(KeyRangeRef(LiteralStringRef("\xff\xff/metrics/health/storage/"),
|
|
|
|
LiteralStringRef("\xff\xff/metrics/health/storage0"))) ||
|
|
|
|
kr.intersects(KeyRangeRef(LiteralStringRef("\xff\xff/metrics/health/log/"),
|
|
|
|
LiteralStringRef("\xff\xff/metrics/health/log0")))));
|
|
|
|
return healthMetricsToKVPairs(metrics, kr);
|
|
|
|
}
|
|
|
|
|
2020-06-23 09:11:03 +08:00
|
|
|
HealthMetricsRangeImpl::HealthMetricsRangeImpl(KeyRangeRef kr) : SpecialKeyRangeAsyncImpl(kr) {}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> HealthMetricsRangeImpl::getRange(ReadYourWritesTransaction* ryw, KeyRangeRef kr) const {
|
2020-06-23 09:11:03 +08:00
|
|
|
return healthMetricsGetRangeActor(ryw, kr);
|
|
|
|
}
|
|
|
|
|
2020-04-09 05:33:41 +08:00
|
|
|
DatabaseContext::DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionFile>>> connectionFile,
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<AsyncVar<ClientDBInfo>> clientInfo,
|
2021-04-16 02:45:14 +08:00
|
|
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> clientInfoMonitor,
|
|
|
|
TaskPriority taskID,
|
|
|
|
LocalityData const& clientLocality,
|
|
|
|
bool enableLocalityLoadBalance,
|
|
|
|
bool lockAware,
|
|
|
|
bool internal,
|
|
|
|
int apiVersion,
|
2020-04-09 05:33:41 +08:00
|
|
|
bool switchable)
|
2021-04-16 02:45:14 +08:00
|
|
|
: connectionFile(connectionFile), clientInfo(clientInfo), coordinator(coordinator),
|
|
|
|
clientInfoMonitor(clientInfoMonitor), taskID(taskID), clientLocality(clientLocality),
|
|
|
|
enableLocalityLoadBalance(enableLocalityLoadBalance), lockAware(lockAware), apiVersion(apiVersion),
|
|
|
|
switchable(switchable), proxyProvisional(false), cc("TransactionMetrics"),
|
2020-08-30 03:35:31 +08:00
|
|
|
transactionReadVersions("ReadVersions", cc), transactionReadVersionsThrottled("ReadVersionsThrottled", cc),
|
2020-04-28 04:05:24 +08:00
|
|
|
transactionReadVersionsCompleted("ReadVersionsCompleted", cc),
|
2020-04-09 05:33:41 +08:00
|
|
|
transactionReadVersionBatches("ReadVersionBatches", cc),
|
|
|
|
transactionBatchReadVersions("BatchPriorityReadVersions", cc),
|
|
|
|
transactionDefaultReadVersions("DefaultPriorityReadVersions", cc),
|
|
|
|
transactionImmediateReadVersions("ImmediatePriorityReadVersions", cc),
|
|
|
|
transactionBatchReadVersionsCompleted("BatchPriorityReadVersionsCompleted", cc),
|
|
|
|
transactionDefaultReadVersionsCompleted("DefaultPriorityReadVersionsCompleted", cc),
|
|
|
|
transactionImmediateReadVersionsCompleted("ImmediatePriorityReadVersionsCompleted", cc),
|
|
|
|
transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc),
|
|
|
|
transactionPhysicalReadsCompleted("PhysicalReadRequestsCompleted", cc),
|
|
|
|
transactionGetKeyRequests("GetKeyRequests", cc), transactionGetValueRequests("GetValueRequests", cc),
|
|
|
|
transactionGetRangeRequests("GetRangeRequests", cc), transactionWatchRequests("WatchRequests", cc),
|
|
|
|
transactionGetAddressesForKeyRequests("GetAddressesForKeyRequests", cc), transactionBytesRead("BytesRead", cc),
|
|
|
|
transactionKeysRead("KeysRead", cc), transactionMetadataVersionReads("MetadataVersionReads", cc),
|
|
|
|
transactionCommittedMutations("CommittedMutations", cc),
|
|
|
|
transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionSetMutations("SetMutations", cc),
|
|
|
|
transactionClearMutations("ClearMutations", cc), transactionAtomicMutations("AtomicMutations", cc),
|
|
|
|
transactionsCommitStarted("CommitStarted", cc), transactionsCommitCompleted("CommitCompleted", cc),
|
|
|
|
transactionKeyServerLocationRequests("KeyServerLocationRequests", cc),
|
|
|
|
transactionKeyServerLocationRequestsCompleted("KeyServerLocationRequestsCompleted", cc),
|
2021-01-15 04:13:29 +08:00
|
|
|
transactionStatusRequests("StatusRequests", cc), transactionsTooOld("TooOld", cc),
|
|
|
|
transactionsFutureVersions("FutureVersions", cc), transactionsNotCommitted("NotCommitted", cc),
|
|
|
|
transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
|
|
|
|
transactionsThrottled("Throttled", cc), transactionsProcessBehind("ProcessBehind", cc), outstandingWatches(0),
|
|
|
|
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000),
|
|
|
|
bytesPerCommit(1000), mvCacheInsertLocation(0), healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0),
|
2021-01-16 03:35:10 +08:00
|
|
|
internal(internal), transactionTracingEnabled(true), smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
2020-08-30 03:35:31 +08:00
|
|
|
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc),
|
2020-05-15 08:30:48 +08:00
|
|
|
specialKeySpace(std::make_unique<SpecialKeySpace>(specialKeys.begin, specialKeys.end, /* test */ false)) {
|
2019-07-09 05:01:04 +08:00
|
|
|
dbId = deterministicRandom()->randomUniqueID();
|
2020-09-11 08:44:15 +08:00
|
|
|
connected = (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size())
|
2020-08-13 01:34:07 +08:00
|
|
|
? Void()
|
|
|
|
: clientInfo->onChange();
|
2019-07-09 05:01:04 +08:00
|
|
|
|
2019-03-01 09:45:00 +08:00
|
|
|
metadataVersionCache.resize(CLIENT_KNOBS->METADATA_VERSION_CACHE_SIZE);
|
2018-09-22 06:58:14 +08:00
|
|
|
maxOutstandingWatches = CLIENT_KNOBS->DEFAULT_MAX_OUTSTANDING_WATCHES;
|
|
|
|
|
2019-08-09 06:00:33 +08:00
|
|
|
snapshotRywEnabled = apiVersionAtLeast(300) ? 1 : 0;
|
2019-03-20 00:15:41 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
logger = databaseLogger(this);
|
|
|
|
locationCacheSize = g_network->isSimulated() ? CLIENT_KNOBS->LOCATION_CACHE_EVICTION_SIZE_SIM
|
|
|
|
: CLIENT_KNOBS->LOCATION_CACHE_EVICTION_SIZE;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
getValueSubmitted.init(LiteralStringRef("NativeAPI.GetValueSubmitted"));
|
|
|
|
getValueCompleted.init(LiteralStringRef("NativeAPI.GetValueCompleted"));
|
|
|
|
|
2021-02-24 08:17:05 +08:00
|
|
|
GlobalConfig::create(this, clientInfo);
|
2021-04-21 08:51:38 +08:00
|
|
|
GlobalConfig::globalConfig().trigger(samplingFrequency, samplingProfilerUpdateFrequency);
|
2021-04-24 05:05:05 +08:00
|
|
|
GlobalConfig::globalConfig().trigger(samplingWindow, samplingProfilerUpdateWindow);
|
2021-02-24 08:17:05 +08:00
|
|
|
|
2020-07-23 03:20:22 +08:00
|
|
|
monitorProxiesInfoChange = monitorProxiesChange(clientInfo, &proxiesChangeTrigger);
|
2017-05-26 04:48:44 +08:00
|
|
|
clientStatusUpdater.actor = clientStatusUpdateActor(this);
|
2020-01-08 03:42:57 +08:00
|
|
|
cacheListMonitor = monitorCacheList(this);
|
2020-08-01 02:13:05 +08:00
|
|
|
|
2020-08-05 11:57:25 +08:00
|
|
|
smoothMidShardSize.reset(CLIENT_KNOBS->INIT_MID_SHARD_BYTES);
|
2020-08-01 02:13:05 +08:00
|
|
|
|
2020-07-07 02:02:48 +08:00
|
|
|
if (apiVersionAtLeast(700)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::ERRORMSG,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<SingleSpecialKeyImpl>(
|
|
|
|
SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::ERRORMSG).begin,
|
|
|
|
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
|
|
|
|
if (ryw->getSpecialKeySpaceErrorMsg().present())
|
|
|
|
return Optional<Value>(ryw->getSpecialKeySpaceErrorMsg().get());
|
|
|
|
else
|
|
|
|
return Optional<Value>();
|
|
|
|
}));
|
2020-07-07 02:02:48 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<ManagementCommandsOptionsImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("options/"), LiteralStringRef("options0"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<ExcludeServersRangeImpl>(SpecialKeySpace::getManamentApiCommandRange("exclude")));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<FailedServersRangeImpl>(SpecialKeySpace::getManamentApiCommandRange("failed")));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<ExclusionInProgressRangeImpl>(
|
2020-10-23 09:05:54 +08:00
|
|
|
KeyRangeRef(LiteralStringRef("in_progress_exclusion/"), LiteralStringRef("in_progress_exclusion0"))
|
2021-03-11 02:06:03 +08:00
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2020-08-21 05:51:41 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::CONFIGURATION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2020-08-21 05:51:41 +08:00
|
|
|
std::make_unique<ProcessClassRangeImpl>(
|
2020-08-26 09:18:32 +08:00
|
|
|
KeyRangeRef(LiteralStringRef("process/class_type/"), LiteralStringRef("process/class_type0"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::CONFIGURATION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-08-26 09:18:32 +08:00
|
|
|
std::make_unique<ProcessClassSourceRangeImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("process/class_source/"), LiteralStringRef("process/class_source0"))
|
2020-08-21 05:51:41 +08:00
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
|
2020-10-09 05:23:02 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
|
|
|
std::make_unique<LockDatabaseImpl>(
|
|
|
|
singleKeyRange(LiteralStringRef("db_locked"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2020-10-23 02:08:54 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2020-10-23 02:28:03 +08:00
|
|
|
std::make_unique<ConsistencyCheckImpl>(
|
|
|
|
singleKeyRange(LiteralStringRef("consistency_check_suspended"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2020-12-01 06:57:17 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-04-24 06:00:21 +08:00
|
|
|
SpecialKeySpace::MODULE::GLOBALCONFIG,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
|
|
|
std::make_unique<GlobalConfigImpl>(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::GLOBALCONFIG)));
|
2021-02-13 10:55:01 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-04-24 06:00:21 +08:00
|
|
|
SpecialKeySpace::MODULE::TRACING,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
|
|
|
std::make_unique<TracingOptionsImpl>(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::TRACING)));
|
2021-01-30 03:45:52 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::CONFIGURATION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2021-01-30 10:20:09 +08:00
|
|
|
std::make_unique<CoordinatorsImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("coordinators/"), LiteralStringRef("coordinators0"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::CONFIGURATION).begin)));
|
2021-02-17 15:55:58 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2021-02-17 15:55:58 +08:00
|
|
|
std::make_unique<CoordinatorsAutoImpl>(
|
|
|
|
singleKeyRange(LiteralStringRef("auto_coordinators"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2021-02-18 17:27:14 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-12 04:53:46 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2021-02-18 17:27:14 +08:00
|
|
|
std::make_unique<AdvanceVersionImpl>(
|
|
|
|
singleKeyRange(LiteralStringRef("min_required_commit_version"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2021-02-20 04:22:00 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-12 04:53:46 +08:00
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2021-02-20 04:22:00 +08:00
|
|
|
std::make_unique<ClientProfilingImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("profiling/"), LiteralStringRef("profiling0"))
|
2021-02-20 06:29:08 +08:00
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2021-03-27 03:19:33 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
|
|
|
std::make_unique<MaintenanceImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("maintenance/"), LiteralStringRef("maintenance0"))
|
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
|
|
|
registerSpecialKeySpaceModule(
|
|
|
|
SpecialKeySpace::MODULE::MANAGEMENT, SpecialKeySpace::IMPLTYPE::READWRITE,
|
|
|
|
std::make_unique<DataDistributionImpl>(
|
|
|
|
KeyRangeRef(LiteralStringRef("data_distribution/"), LiteralStringRef("data_distribution0"))
|
2021-03-12 04:53:46 +08:00
|
|
|
.withPrefix(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::MANAGEMENT).begin)));
|
2021-04-16 04:50:50 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
|
|
|
SpecialKeySpace::MODULE::ACTORLINEAGE,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
|
|
|
std::make_unique<ActorLineageImpl>(SpecialKeySpace::getModuleRange(SpecialKeySpace::MODULE::ACTORLINEAGE)));
|
2021-04-28 00:26:42 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::ACTOR_PROFILER_CONF,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READWRITE,
|
2021-04-29 06:35:09 +08:00
|
|
|
std::make_unique<ActorProfilerConf>(SpecialKeySpace::getModuleRange(
|
|
|
|
SpecialKeySpace::MODULE::ACTOR_PROFILER_CONF)));
|
2020-07-07 02:02:48 +08:00
|
|
|
}
|
2020-05-06 04:48:36 +08:00
|
|
|
if (apiVersionAtLeast(630)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<ConflictingKeysImpl>(conflictingKeysRange));
|
2021-03-11 02:06:03 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<ReadConflictRangeImpl>(readConflictRangeKeysRange));
|
2021-03-11 02:06:03 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::TRANSACTION,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<WriteConflictRangeImpl>(writeConflictRangeKeysRange));
|
2021-03-11 02:06:03 +08:00
|
|
|
registerSpecialKeySpaceModule(SpecialKeySpace::MODULE::METRICS,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-05-19 05:23:17 +08:00
|
|
|
std::make_unique<DDStatsRangeImpl>(ddStatsRange));
|
2020-06-23 09:11:03 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::METRICS,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-06-23 09:11:03 +08:00
|
|
|
std::make_unique<HealthMetricsRangeImpl>(KeyRangeRef(LiteralStringRef("\xff\xff/metrics/health/"),
|
|
|
|
LiteralStringRef("\xff\xff/metrics/health0"))));
|
2020-05-21 04:55:07 +08:00
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::WORKERINTERFACE,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-07-28 03:29:03 +08:00
|
|
|
std::make_unique<WorkerInterfacesSpecialKeyImpl>(KeyRangeRef(
|
|
|
|
LiteralStringRef("\xff\xff/worker_interfaces/"), LiteralStringRef("\xff\xff/worker_interfaces0"))));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::STATUSJSON,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-05-21 04:55:07 +08:00
|
|
|
std::make_unique<SingleSpecialKeyImpl>(LiteralStringRef("\xff\xff/status/json"),
|
|
|
|
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
|
|
|
|
if (ryw->getDatabase().getPtr() &&
|
|
|
|
ryw->getDatabase()->getConnectionFile()) {
|
2021-01-15 04:13:29 +08:00
|
|
|
++ryw->getDatabase()->transactionStatusRequests;
|
2020-05-21 04:55:07 +08:00
|
|
|
return getJSON(ryw->getDatabase());
|
|
|
|
} else {
|
|
|
|
return Optional<Value>();
|
|
|
|
}
|
|
|
|
}));
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::CLUSTERFILEPATH,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-05-21 04:55:07 +08:00
|
|
|
std::make_unique<SingleSpecialKeyImpl>(
|
|
|
|
LiteralStringRef("\xff\xff/cluster_file_path"),
|
|
|
|
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
|
|
|
|
try {
|
|
|
|
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
|
|
|
|
Optional<Value> output = StringRef(ryw->getDatabase()->getConnectionFile()->getFilename());
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
return Optional<Value>();
|
|
|
|
}));
|
|
|
|
|
|
|
|
registerSpecialKeySpaceModule(
|
2021-03-11 02:06:03 +08:00
|
|
|
SpecialKeySpace::MODULE::CONNECTIONSTRING,
|
|
|
|
SpecialKeySpace::IMPLTYPE::READONLY,
|
2020-05-21 04:55:07 +08:00
|
|
|
std::make_unique<SingleSpecialKeyImpl>(
|
|
|
|
LiteralStringRef("\xff\xff/connection_string"),
|
|
|
|
[](ReadYourWritesTransaction* ryw) -> Future<Optional<Value>> {
|
|
|
|
try {
|
|
|
|
if (ryw->getDatabase().getPtr() && ryw->getDatabase()->getConnectionFile()) {
|
|
|
|
Reference<ClusterConnectionFile> f = ryw->getDatabase()->getConnectionFile();
|
|
|
|
Optional<Value> output = StringRef(f->getConnectionString().toString());
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
return e;
|
|
|
|
}
|
|
|
|
return Optional<Value>();
|
|
|
|
}));
|
2020-05-06 04:48:36 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
throttleExpirer = recurring([this]() { expireThrottles(); }, CLIENT_KNOBS->TAG_THROTTLE_EXPIRATION_INTERVAL);
|
2020-05-13 05:11:03 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (BUGGIFY) {
|
2020-05-13 05:11:03 +08:00
|
|
|
DatabaseContext::debugUseTags = true;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-08-30 03:35:31 +08:00
|
|
|
DatabaseContext::DatabaseContext(const Error& err)
|
|
|
|
: deferredError(err), cc("TransactionMetrics"), transactionReadVersions("ReadVersions", cc),
|
|
|
|
transactionReadVersionsThrottled("ReadVersionsThrottled", cc),
|
|
|
|
transactionReadVersionsCompleted("ReadVersionsCompleted", cc),
|
|
|
|
transactionReadVersionBatches("ReadVersionBatches", cc),
|
|
|
|
transactionBatchReadVersions("BatchPriorityReadVersions", cc),
|
|
|
|
transactionDefaultReadVersions("DefaultPriorityReadVersions", cc),
|
|
|
|
transactionImmediateReadVersions("ImmediatePriorityReadVersions", cc),
|
|
|
|
transactionBatchReadVersionsCompleted("BatchPriorityReadVersionsCompleted", cc),
|
|
|
|
transactionDefaultReadVersionsCompleted("DefaultPriorityReadVersionsCompleted", cc),
|
|
|
|
transactionImmediateReadVersionsCompleted("ImmediatePriorityReadVersionsCompleted", cc),
|
|
|
|
transactionLogicalReads("LogicalUncachedReads", cc), transactionPhysicalReads("PhysicalReadRequests", cc),
|
|
|
|
transactionPhysicalReadsCompleted("PhysicalReadRequestsCompleted", cc),
|
|
|
|
transactionGetKeyRequests("GetKeyRequests", cc), transactionGetValueRequests("GetValueRequests", cc),
|
|
|
|
transactionGetRangeRequests("GetRangeRequests", cc), transactionWatchRequests("WatchRequests", cc),
|
|
|
|
transactionGetAddressesForKeyRequests("GetAddressesForKeyRequests", cc), transactionBytesRead("BytesRead", cc),
|
|
|
|
transactionKeysRead("KeysRead", cc), transactionMetadataVersionReads("MetadataVersionReads", cc),
|
|
|
|
transactionCommittedMutations("CommittedMutations", cc),
|
|
|
|
transactionCommittedMutationBytes("CommittedMutationBytes", cc), transactionSetMutations("SetMutations", cc),
|
|
|
|
transactionClearMutations("ClearMutations", cc), transactionAtomicMutations("AtomicMutations", cc),
|
|
|
|
transactionsCommitStarted("CommitStarted", cc), transactionsCommitCompleted("CommitCompleted", cc),
|
|
|
|
transactionKeyServerLocationRequests("KeyServerLocationRequests", cc),
|
|
|
|
transactionKeyServerLocationRequestsCompleted("KeyServerLocationRequestsCompleted", cc),
|
2021-01-15 04:13:29 +08:00
|
|
|
transactionStatusRequests("StatusRequests", cc), transactionsTooOld("TooOld", cc),
|
|
|
|
transactionsFutureVersions("FutureVersions", cc), transactionsNotCommitted("NotCommitted", cc),
|
|
|
|
transactionsMaybeCommitted("MaybeCommitted", cc), transactionsResourceConstrained("ResourceConstrained", cc),
|
|
|
|
transactionsThrottled("Throttled", cc), transactionsProcessBehind("ProcessBehind", cc), latencies(1000),
|
|
|
|
readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000),
|
2020-08-30 03:35:31 +08:00
|
|
|
smoothMidShardSize(CLIENT_KNOBS->SHARD_STAT_SMOOTH_AMOUNT),
|
2021-01-16 03:35:10 +08:00
|
|
|
transactionsExpensiveClearCostEstCount("ExpensiveClearCostEstCount", cc), internal(false),
|
|
|
|
transactionTracingEnabled(true) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Static constructor used by server processes to create a DatabaseContext
|
|
|
|
// For internal (fdbserver) use only
|
2021-03-11 02:06:03 +08:00
|
|
|
Database DatabaseContext::create(Reference<AsyncVar<ClientDBInfo>> clientInfo,
|
|
|
|
Future<Void> clientInfoMonitor,
|
|
|
|
LocalityData clientLocality,
|
|
|
|
bool enableLocalityLoadBalance,
|
|
|
|
TaskPriority taskID,
|
|
|
|
bool lockAware,
|
|
|
|
int apiVersion,
|
|
|
|
bool switchable) {
|
|
|
|
return Database(new DatabaseContext(Reference<AsyncVar<Reference<ClusterConnectionFile>>>(),
|
|
|
|
clientInfo,
|
2021-04-16 02:45:14 +08:00
|
|
|
makeReference<AsyncVar<Optional<ClientLeaderRegInterface>>>(),
|
2021-03-11 02:06:03 +08:00
|
|
|
clientInfoMonitor,
|
|
|
|
taskID,
|
|
|
|
clientLocality,
|
|
|
|
enableLocalityLoadBalance,
|
|
|
|
lockAware,
|
|
|
|
true,
|
|
|
|
apiVersion,
|
|
|
|
switchable));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
DatabaseContext::~DatabaseContext() {
|
2020-01-08 03:42:57 +08:00
|
|
|
cacheListMonitor.cancel();
|
2020-07-23 03:20:22 +08:00
|
|
|
monitorProxiesInfoChange.cancel();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto it = server_interf.begin(); it != server_interf.end(); it = server_interf.erase(it))
|
2017-05-26 04:48:44 +08:00
|
|
|
it->second->notifyContextDestroyed();
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT_ABORT(server_interf.empty());
|
2020-01-27 13:25:15 +08:00
|
|
|
locationCache.insert(allKeys, Reference<LocationInfo>());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
pair<KeyRange, Reference<LocationInfo>> DatabaseContext::getCachedLocation(const KeyRef& key, bool isBackward) {
|
|
|
|
if (isBackward) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto range = locationCache.rangeContainingKeyBefore(key);
|
2020-01-27 13:25:15 +08:00
|
|
|
return std::make_pair(range->range(), range->value());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto range = locationCache.rangeContaining(key);
|
2020-01-27 13:25:15 +08:00
|
|
|
return std::make_pair(range->range(), range->value());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool DatabaseContext::getCachedLocations(const KeyRangeRef& range,
|
|
|
|
vector<std::pair<KeyRange, Reference<LocationInfo>>>& result,
|
|
|
|
int limit,
|
|
|
|
bool reverse) {
|
2017-05-26 04:48:44 +08:00
|
|
|
result.clear();
|
|
|
|
|
|
|
|
auto begin = locationCache.rangeContaining(range.begin);
|
|
|
|
auto end = locationCache.rangeContainingKeyBefore(range.end);
|
|
|
|
|
|
|
|
loop {
|
|
|
|
auto r = reverse ? end : begin;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!r->value()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
TEST(result.size()); // had some but not all cached locations
|
|
|
|
result.clear();
|
|
|
|
return false;
|
|
|
|
}
|
2020-01-27 13:25:15 +08:00
|
|
|
result.emplace_back(r->range() & range, r->value());
|
2019-06-20 14:16:27 +08:00
|
|
|
if (result.size() == limit || begin == end) {
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
2019-06-20 14:16:27 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse)
|
2017-05-26 04:48:44 +08:00
|
|
|
--end;
|
|
|
|
else
|
|
|
|
++begin;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<LocationInfo> DatabaseContext::setCachedLocation(const KeyRangeRef& keys,
|
|
|
|
const vector<StorageServerInterface>& servers) {
|
2018-11-03 04:15:09 +08:00
|
|
|
vector<Reference<ReferencedInterface<StorageServerInterface>>> serverRefs;
|
|
|
|
serverRefs.reserve(servers.size());
|
2020-12-27 13:46:20 +08:00
|
|
|
for (const auto& interf : servers) {
|
2021-03-11 02:06:03 +08:00
|
|
|
serverRefs.push_back(StorageServerInfo::getInterface(this, interf, clientLocality));
|
2018-11-03 04:15:09 +08:00
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
int maxEvictionAttempts = 100, attempts = 0;
|
2020-11-07 15:50:55 +08:00
|
|
|
auto loc = makeReference<LocationInfo>(serverRefs);
|
2021-03-11 02:06:03 +08:00
|
|
|
while (locationCache.size() > locationCacheSize && attempts < maxEvictionAttempts) {
|
|
|
|
TEST(true); // NativeAPI storage server locationCache entry evicted
|
2017-05-26 04:48:44 +08:00
|
|
|
attempts++;
|
|
|
|
auto r = locationCache.randomRange();
|
2021-03-11 02:06:03 +08:00
|
|
|
Key begin = r.begin(), end = r.end(); // insert invalidates r, so can't be passed a mere reference into it
|
2020-01-27 13:25:15 +08:00
|
|
|
locationCache.insert(KeyRangeRef(begin, end), Reference<LocationInfo>());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
locationCache.insert(keys, loc);
|
2020-05-10 04:04:48 +08:00
|
|
|
return loc;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void DatabaseContext::invalidateCache(const KeyRef& key, bool isBackward) {
|
|
|
|
if (isBackward) {
|
2020-01-27 13:25:15 +08:00
|
|
|
locationCache.rangeContainingKeyBefore(key)->value() = Reference<LocationInfo>();
|
2020-01-08 03:42:57 +08:00
|
|
|
} else {
|
2020-01-27 13:25:15 +08:00
|
|
|
locationCache.rangeContaining(key)->value() = Reference<LocationInfo>();
|
2020-01-08 03:42:57 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void DatabaseContext::invalidateCache(const KeyRangeRef& keys) {
|
2017-05-26 04:48:44 +08:00
|
|
|
auto rs = locationCache.intersectingRanges(keys);
|
2021-03-11 02:06:03 +08:00
|
|
|
Key begin = rs.begin().begin(),
|
|
|
|
end = rs.end().begin(); // insert invalidates rs, so can't be passed a mere reference into it
|
2020-01-27 13:25:15 +08:00
|
|
|
locationCache.insert(KeyRangeRef(begin, end), Reference<LocationInfo>());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-07-23 03:20:22 +08:00
|
|
|
Future<Void> DatabaseContext::onProxiesChanged() {
|
|
|
|
return this->proxiesChangeTrigger.onTrigger();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-08-15 00:40:16 +08:00
|
|
|
bool DatabaseContext::sampleReadTags() const {
|
2021-02-24 08:17:05 +08:00
|
|
|
double sampleRate = GlobalConfig::globalConfig().get(transactionTagSampleRate, CLIENT_KNOBS->READ_TAG_SAMPLE_RATE);
|
|
|
|
return sampleRate > 0 && deterministicRandom()->random01() <= sampleRate;
|
2020-04-10 07:55:56 +08:00
|
|
|
}
|
|
|
|
|
2020-08-15 00:40:16 +08:00
|
|
|
bool DatabaseContext::sampleOnCost(uint64_t cost) const {
|
2021-03-17 08:20:25 +08:00
|
|
|
double sampleCost =
|
|
|
|
GlobalConfig::globalConfig().get<double>(transactionTagSampleCost, CLIENT_KNOBS->COMMIT_SAMPLE_COST);
|
|
|
|
if (sampleCost <= 0)
|
2021-03-11 02:06:03 +08:00
|
|
|
return false;
|
2021-02-24 08:17:05 +08:00
|
|
|
return deterministicRandom()->random01() <= (double)cost / sampleCost;
|
2020-08-01 02:13:05 +08:00
|
|
|
}
|
2020-04-10 07:55:56 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
int64_t extractIntOption(Optional<StringRef> value, int64_t minValue, int64_t maxValue) {
|
2017-05-26 04:48:44 +08:00
|
|
|
validateOptionValue(value, true);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (value.get().size() != 8) {
|
2017-05-26 04:48:44 +08:00
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t passed = *((int64_t*)(value.get().begin()));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (passed > maxValue || passed < minValue) {
|
2017-05-26 04:48:44 +08:00
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
|
|
|
|
return passed;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
uint64_t extractHexOption(StringRef value) {
|
2017-05-26 04:48:44 +08:00
|
|
|
char* end;
|
2021-03-11 02:06:03 +08:00
|
|
|
uint64_t id = strtoull(value.toString().c_str(), &end, 16);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (*end)
|
|
|
|
throw invalid_option_value();
|
|
|
|
return id;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void DatabaseContext::setOption(FDBDatabaseOptions::Option option, Optional<StringRef> value) {
|
2019-07-12 02:25:39 +08:00
|
|
|
int defaultFor = FDBDatabaseOptions::optionInfo.getMustExist(option).defaultFor;
|
2019-06-29 04:24:32 +08:00
|
|
|
if (defaultFor >= 0) {
|
|
|
|
ASSERT(FDBTransactionOptions::optionInfo.find((FDBTransactionOptions::Option)defaultFor) !=
|
|
|
|
FDBTransactionOptions::optionInfo.end());
|
2019-07-13 08:58:16 +08:00
|
|
|
transactionDefaults.addOption((FDBTransactionOptions::Option)defaultFor, value.castTo<Standalone<StringRef>>());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
|
|
|
switch (option) {
|
|
|
|
case FDBDatabaseOptions::LOCATION_CACHE_SIZE:
|
|
|
|
locationCacheSize = (int)extractIntOption(value, 0, std::numeric_limits<int>::max());
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::MACHINE_ID:
|
|
|
|
clientLocality =
|
|
|
|
LocalityData(clientLocality.processId(),
|
|
|
|
value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>(),
|
|
|
|
clientLocality.machineId(),
|
|
|
|
clientLocality.dcId());
|
|
|
|
if (clientInfo->get().commitProxies.size())
|
|
|
|
commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
|
|
|
|
if (clientInfo->get().grvProxies.size())
|
|
|
|
grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
|
|
|
|
server_interf.clear();
|
|
|
|
locationCache.insert(allKeys, Reference<LocationInfo>());
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::MAX_WATCHES:
|
|
|
|
maxOutstandingWatches = (int)extractIntOption(value, 0, CLIENT_KNOBS->ABSOLUTE_MAX_WATCHES);
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::DATACENTER_ID:
|
|
|
|
clientLocality =
|
|
|
|
LocalityData(clientLocality.processId(),
|
|
|
|
clientLocality.zoneId(),
|
|
|
|
clientLocality.machineId(),
|
|
|
|
value.present() ? Standalone<StringRef>(value.get()) : Optional<Standalone<StringRef>>());
|
|
|
|
if (clientInfo->get().commitProxies.size())
|
|
|
|
commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
|
|
|
|
if (clientInfo->get().grvProxies.size())
|
|
|
|
grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
|
|
|
|
server_interf.clear();
|
|
|
|
locationCache.insert(allKeys, Reference<LocationInfo>());
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::SNAPSHOT_RYW_ENABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
snapshotRywEnabled++;
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::SNAPSHOT_RYW_DISABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
snapshotRywEnabled--;
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::DISTRIBUTED_TRANSACTION_TRACE_ENABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
transactionTracingEnabled++;
|
|
|
|
break;
|
|
|
|
case FDBDatabaseOptions::DISTRIBUTED_TRANSACTION_TRACE_DISABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
transactionTracingEnabled--;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2019-07-03 06:42:53 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void DatabaseContext::addWatch() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (outstandingWatches >= maxOutstandingWatches)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw too_many_watches();
|
|
|
|
|
|
|
|
++outstandingWatches;
|
|
|
|
}
|
|
|
|
|
|
|
|
void DatabaseContext::removeWatch() {
|
|
|
|
--outstandingWatches;
|
|
|
|
ASSERT(outstandingWatches >= 0);
|
|
|
|
}
|
|
|
|
|
2018-09-22 06:58:14 +08:00
|
|
|
Future<Void> DatabaseContext::onConnected() {
|
2019-07-24 10:22:44 +08:00
|
|
|
return connected;
|
2018-09-22 06:58:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-05-25 00:24:23 +08:00
|
|
|
ACTOR static Future<Void> switchConnectionFileImpl(Reference<ClusterConnectionFile> connFile, DatabaseContext* self) {
|
|
|
|
TEST(true); // Switch connection file
|
|
|
|
TraceEvent("SwitchConnectionFile")
|
|
|
|
.detail("ConnectionFile", connFile->canGetFilename() ? connFile->getFilename() : "")
|
2019-04-23 06:48:47 +08:00
|
|
|
.detail("ConnectionString", connFile->getConnectionString().toString());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-04-23 06:48:47 +08:00
|
|
|
// Reset state from former cluster.
|
2020-09-11 08:44:15 +08:00
|
|
|
self->commitProxies.clear();
|
2020-07-15 15:37:41 +08:00
|
|
|
self->grvProxies.clear();
|
2019-04-23 06:48:47 +08:00
|
|
|
self->minAcceptableReadVersion = std::numeric_limits<Version>::max();
|
|
|
|
self->invalidateCache(allKeys);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-07-31 07:32:26 +08:00
|
|
|
auto clearedClientInfo = self->clientInfo->get();
|
2020-09-11 08:44:15 +08:00
|
|
|
clearedClientInfo.commitProxies.clear();
|
2020-07-15 15:37:41 +08:00
|
|
|
clearedClientInfo.grvProxies.clear();
|
2019-07-31 07:32:26 +08:00
|
|
|
clearedClientInfo.id = deterministicRandom()->randomUniqueID();
|
|
|
|
self->clientInfo->set(clearedClientInfo);
|
2019-07-27 06:05:02 +08:00
|
|
|
self->connectionFile->set(connFile);
|
2019-08-09 06:00:33 +08:00
|
|
|
|
2019-04-23 06:48:47 +08:00
|
|
|
state Database db(Reference<DatabaseContext>::addRef(self));
|
|
|
|
state Transaction tr(db);
|
|
|
|
loop {
|
|
|
|
tr.setOption(FDBTransactionOptions::READ_LOCK_AWARE);
|
|
|
|
try {
|
2019-05-25 00:24:23 +08:00
|
|
|
TraceEvent("SwitchConnectionFileAttemptingGRV");
|
2019-04-23 06:48:47 +08:00
|
|
|
Version v = wait(tr.getReadVersion());
|
2019-05-25 00:24:23 +08:00
|
|
|
TraceEvent("SwitchConnectionFileGotRV")
|
2019-04-23 06:48:47 +08:00
|
|
|
.detail("ReadVersion", v)
|
|
|
|
.detail("MinAcceptableReadVersion", self->minAcceptableReadVersion);
|
|
|
|
ASSERT(self->minAcceptableReadVersion != std::numeric_limits<Version>::max());
|
2019-06-20 07:53:14 +08:00
|
|
|
self->connectionFileChangedTrigger.trigger();
|
2019-04-23 06:48:47 +08:00
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
2019-05-25 00:24:23 +08:00
|
|
|
TraceEvent("SwitchConnectionFileError").detail("Error", e.what());
|
2019-04-23 06:48:47 +08:00
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
2019-03-15 04:42:03 +08:00
|
|
|
}
|
2018-09-22 06:58:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-09-22 06:58:14 +08:00
|
|
|
Reference<ClusterConnectionFile> DatabaseContext::getConnectionFile() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (connectionFile) {
|
2019-07-28 04:02:06 +08:00
|
|
|
return connectionFile->get();
|
|
|
|
}
|
|
|
|
return Reference<ClusterConnectionFile>();
|
2019-06-29 04:24:32 +08:00
|
|
|
}
|
|
|
|
|
2019-05-25 00:24:23 +08:00
|
|
|
Future<Void> DatabaseContext::switchConnectionFile(Reference<ClusterConnectionFile> standby) {
|
2019-05-25 01:51:08 +08:00
|
|
|
ASSERT(switchable);
|
2019-05-25 00:24:23 +08:00
|
|
|
return switchConnectionFileImpl(standby, this);
|
2018-09-22 06:58:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-06-20 07:53:14 +08:00
|
|
|
Future<Void> DatabaseContext::connectionFileChanged() {
|
|
|
|
return connectionFileChangedTrigger.onTrigger();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-04-24 11:50:40 +08:00
|
|
|
void DatabaseContext::expireThrottles() {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& priorityItr : throttledTags) {
|
|
|
|
for (auto tagItr = priorityItr.second.begin(); tagItr != priorityItr.second.end();) {
|
|
|
|
if (tagItr->second.expired()) {
|
2020-05-05 01:11:36 +08:00
|
|
|
TEST(true); // Expiring client throttle
|
2020-04-24 11:50:40 +08:00
|
|
|
tagItr = priorityItr.second.erase(tagItr);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-04-24 11:50:40 +08:00
|
|
|
++tagItr;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-27 10:04:03 +08:00
|
|
|
extern IPAddress determinePublicIPAutomatically(ClusterConnectionString const& ccs);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Creates a database object that represents a connection to a cluster
|
|
|
|
// This constructor uses a preallocated DatabaseContext that may have been created
|
|
|
|
// on another thread
|
2021-03-11 02:06:03 +08:00
|
|
|
Database Database::createDatabase(Reference<ClusterConnectionFile> connFile,
|
|
|
|
int apiVersion,
|
|
|
|
bool internal,
|
|
|
|
LocalityData const& clientLocality,
|
|
|
|
DatabaseContext* preallocatedDb) {
|
|
|
|
if (!g_network)
|
2018-09-22 06:58:14 +08:00
|
|
|
throw network_not_setup();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (connFile) {
|
|
|
|
if (networkOptions.traceDirectory.present() && !traceFileIsOpen()) {
|
2018-09-22 06:58:14 +08:00
|
|
|
g_network->initMetrics();
|
|
|
|
FlowTransport::transport().initMetrics();
|
|
|
|
initTraceEventMetrics();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto publicIP = determinePublicIPAutomatically(connFile->getConnectionString());
|
2019-01-26 05:47:12 +08:00
|
|
|
selectTraceFormatter(networkOptions.traceFormat);
|
2019-11-07 06:31:13 +08:00
|
|
|
selectTraceClockSource(networkOptions.traceClockSource);
|
2021-03-11 02:06:03 +08:00
|
|
|
openTraceFile(NetworkAddress(publicIP, ::getpid()),
|
|
|
|
networkOptions.traceRollSize,
|
|
|
|
networkOptions.traceMaxLogsSize,
|
|
|
|
networkOptions.traceDirectory.get(),
|
|
|
|
"trace",
|
|
|
|
networkOptions.traceLogGroup,
|
|
|
|
networkOptions.traceFileIdentifier);
|
2018-09-22 06:58:14 +08:00
|
|
|
|
|
|
|
TraceEvent("ClientStart")
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("SourceVersion", getSourceVersion())
|
|
|
|
.detail("Version", FDB_VT_VERSION)
|
|
|
|
.detail("PackageName", FDB_VT_PACKAGE_NAME)
|
|
|
|
.detail("ClusterFile", connFile->getFilename().c_str())
|
|
|
|
.detail("ConnectionString", connFile->getConnectionString().toString())
|
|
|
|
.detailf("ActualTime", "%lld", DEBUG_DETERMINISM ? 0 : time(nullptr))
|
|
|
|
.detail("ApiVersion", apiVersion)
|
|
|
|
.detailf("ImageOffset", "%p", platform::getImageOffset())
|
|
|
|
.trackLatest("ClientStart");
|
2018-09-22 06:58:14 +08:00
|
|
|
|
2019-02-27 10:04:03 +08:00
|
|
|
initializeSystemMonitorMachineState(SystemMonitorMachineState(IPAddress(publicIP)));
|
2018-09-22 06:58:14 +08:00
|
|
|
|
|
|
|
systemMonitor();
|
2021-03-11 02:06:03 +08:00
|
|
|
uncancellable(recurring(&systemMonitor, CLIENT_KNOBS->SYSTEM_MONITOR_INTERVAL, TaskPriority::FlushTrace));
|
2018-09-22 06:58:14 +08:00
|
|
|
}
|
2019-07-24 10:22:44 +08:00
|
|
|
}
|
2018-09-22 06:58:14 +08:00
|
|
|
|
2020-02-27 10:53:06 +08:00
|
|
|
g_network->initTLS();
|
|
|
|
|
2020-11-07 15:50:55 +08:00
|
|
|
auto clientInfo = makeReference<AsyncVar<ClientDBInfo>>();
|
2021-04-16 02:45:14 +08:00
|
|
|
auto coordinator = makeReference<AsyncVar<Optional<ClientLeaderRegInterface>>>();
|
2020-11-07 15:50:55 +08:00
|
|
|
auto connectionFile = makeReference<AsyncVar<Reference<ClusterConnectionFile>>>();
|
2019-07-27 07:09:46 +08:00
|
|
|
connectionFile->set(connFile);
|
2021-04-16 02:45:14 +08:00
|
|
|
Future<Void> clientInfoMonitor = monitorProxies(connectionFile,
|
|
|
|
clientInfo,
|
|
|
|
coordinator,
|
|
|
|
networkOptions.supportedVersions,
|
|
|
|
StringRef(networkOptions.traceLogGroup));
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
|
|
DatabaseContext* db;
|
|
|
|
if (preallocatedDb) {
|
|
|
|
db = new (preallocatedDb) DatabaseContext(connectionFile,
|
|
|
|
clientInfo,
|
2021-04-16 02:45:14 +08:00
|
|
|
coordinator,
|
2021-03-11 02:06:03 +08:00
|
|
|
clientInfoMonitor,
|
|
|
|
TaskPriority::DefaultEndpoint,
|
|
|
|
clientLocality,
|
|
|
|
true,
|
|
|
|
false,
|
|
|
|
internal,
|
|
|
|
apiVersion,
|
|
|
|
/*switchable*/ true);
|
|
|
|
} else {
|
|
|
|
db = new DatabaseContext(connectionFile,
|
|
|
|
clientInfo,
|
2021-04-16 02:45:14 +08:00
|
|
|
coordinator,
|
2021-03-11 02:06:03 +08:00
|
|
|
clientInfoMonitor,
|
|
|
|
TaskPriority::DefaultEndpoint,
|
|
|
|
clientLocality,
|
|
|
|
true,
|
|
|
|
false,
|
|
|
|
internal,
|
|
|
|
apiVersion,
|
|
|
|
/*switchable*/ true);
|
2018-09-22 06:58:14 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-07-24 10:22:44 +08:00
|
|
|
return Database(db);
|
|
|
|
}
|
2018-09-22 06:58:14 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Database Database::createDatabase(std::string connFileName,
|
|
|
|
int apiVersion,
|
|
|
|
bool internal,
|
|
|
|
LocalityData const& clientLocality) {
|
|
|
|
Reference<ClusterConnectionFile> rccf = Reference<ClusterConnectionFile>(
|
|
|
|
new ClusterConnectionFile(ClusterConnectionFile::lookupClusterFileName(connFileName).first));
|
2019-07-24 10:22:44 +08:00
|
|
|
return Database::createDatabase(rccf, apiVersion, internal, clientLocality);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
Reference<WatchMetadata> DatabaseContext::getWatchMetadata(KeyRef key) const {
|
2021-02-02 07:42:13 +08:00
|
|
|
const auto it = watchMap.find(key);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (it == watchMap.end())
|
|
|
|
return Reference<WatchMetadata>();
|
2021-02-02 07:42:13 +08:00
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
KeyRef DatabaseContext::setWatchMetadata(Reference<WatchMetadata> metadata) {
|
|
|
|
KeyRef keyRef = metadata->key.contents();
|
|
|
|
watchMap[keyRef] = metadata;
|
|
|
|
return keyRef;
|
2021-02-02 07:42:13 +08:00
|
|
|
}
|
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
void DatabaseContext::deleteWatchMetadata(KeyRef key) {
|
2021-02-02 07:42:13 +08:00
|
|
|
watchMap.erase(key);
|
|
|
|
}
|
|
|
|
|
2021-02-10 01:32:37 +08:00
|
|
|
void DatabaseContext::clearWatchMetadata() {
|
|
|
|
watchMap.clear();
|
|
|
|
}
|
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
WatchMetadata::WatchMetadata(Key key, Optional<Value> value, Version version, TransactionInfo info, TagSet tags)
|
|
|
|
: key(key), value(value), version(version), info(info), tags(tags) {
|
2021-02-02 07:42:13 +08:00
|
|
|
// create dummy future
|
|
|
|
watchFuture = watchPromise.getFuture();
|
|
|
|
}
|
|
|
|
|
2019-07-24 10:22:44 +08:00
|
|
|
const UniqueOrderedOptionList<FDBTransactionOptions>& Database::getTransactionDefaults() const {
|
|
|
|
ASSERT(db);
|
|
|
|
return db->transactionDefaults;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void setNetworkOption(FDBNetworkOptions::Option option, Optional<StringRef> value) {
|
2020-04-01 07:42:00 +08:00
|
|
|
std::regex identifierRegex("^[a-zA-Z0-9_]*$");
|
2021-03-11 02:06:03 +08:00
|
|
|
switch (option) {
|
|
|
|
// SOMEDAY: If the network is already started, should these five throw an error?
|
|
|
|
case FDBNetworkOptions::TRACE_ENABLE:
|
|
|
|
networkOptions.traceDirectory = value.present() ? value.get().toString() : "";
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TRACE_ROLL_SIZE:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
networkOptions.traceRollSize = extractIntOption(value, 0, std::numeric_limits<int64_t>::max());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TRACE_MAX_LOGS_SIZE:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
networkOptions.traceMaxLogsSize = extractIntOption(value, 0, std::numeric_limits<int64_t>::max());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TRACE_FORMAT:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
networkOptions.traceFormat = value.get().toString();
|
|
|
|
if (!validateTraceFormat(networkOptions.traceFormat)) {
|
|
|
|
fprintf(stderr, "Unrecognized trace format: `%s'\n", networkOptions.traceFormat.c_str());
|
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TRACE_FILE_IDENTIFIER:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
networkOptions.traceFileIdentifier = value.get().toString();
|
|
|
|
if (networkOptions.traceFileIdentifier.length() > CLIENT_KNOBS->TRACE_LOG_FILE_IDENTIFIER_MAX_LENGTH) {
|
|
|
|
fprintf(stderr, "Trace file identifier provided is too long.\n");
|
|
|
|
throw invalid_option_value();
|
|
|
|
} else if (!std::regex_match(networkOptions.traceFileIdentifier, identifierRegex)) {
|
|
|
|
fprintf(stderr, "Trace file identifier should only contain alphanumerics and underscores.\n");
|
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
break;
|
2018-08-16 00:42:49 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
case FDBNetworkOptions::TRACE_LOG_GROUP:
|
|
|
|
if (value.present()) {
|
|
|
|
if (traceFileIsOpen()) {
|
|
|
|
setTraceLogGroup(value.get().toString());
|
|
|
|
} else {
|
|
|
|
networkOptions.traceLogGroup = value.get().toString();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TRACE_CLOCK_SOURCE:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
networkOptions.traceClockSource = value.get().toString();
|
|
|
|
if (!validateTraceClockSource(networkOptions.traceClockSource)) {
|
|
|
|
fprintf(stderr, "Unrecognized trace clock source: `%s'\n", networkOptions.traceClockSource.c_str());
|
|
|
|
throw invalid_option_value();
|
2020-02-05 06:56:40 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::KNOB: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
|
|
|
|
std::string optionValue = value.get().toString();
|
|
|
|
TraceEvent("SetKnob").detail("KnobString", optionValue);
|
|
|
|
|
|
|
|
size_t eq = optionValue.find_first_of('=');
|
|
|
|
if (eq == optionValue.npos) {
|
|
|
|
TraceEvent(SevWarnAlways, "InvalidKnobString").detail("KnobString", optionValue);
|
|
|
|
throw invalid_option_value();
|
2020-02-05 06:56:40 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
|
|
std::string knobName = optionValue.substr(0, eq);
|
|
|
|
std::string knobValue = optionValue.substr(eq + 1);
|
|
|
|
if (globalFlowKnobs->setKnob(knobName, knobValue)) {
|
|
|
|
// update dependent knobs
|
|
|
|
globalFlowKnobs->initialize();
|
|
|
|
} else if (globalClientKnobs->setKnob(knobName, knobValue)) {
|
|
|
|
// update dependent knobs
|
|
|
|
globalClientKnobs->initialize();
|
|
|
|
} else {
|
|
|
|
TraceEvent(SevWarnAlways, "UnrecognizedKnob").detail("Knob", knobName.c_str());
|
|
|
|
fprintf(stderr, "FoundationDB client ignoring unrecognized knob option '%s'\n", knobName.c_str());
|
2020-02-05 06:56:40 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::TLS_PLUGIN:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TLS_CERT_PATH:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setCertificatePath(value.get().toString());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TLS_CERT_BYTES: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setCertificateBytes(value.get().toString());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::TLS_CA_PATH: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setCAPath(value.get().toString());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::TLS_CA_BYTES: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setCABytes(value.get().toString());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::TLS_PASSWORD:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setPassword(value.get().toString());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TLS_KEY_PATH:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setKeyPath(value.get().toString());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::TLS_KEY_BYTES: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.setKeyBytes(value.get().toString());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::TLS_VERIFY_PEERS:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
tlsConfig.clearVerifyPeers();
|
|
|
|
tlsConfig.addVerifyPeers(value.get().toString());
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::CLIENT_BUGGIFY_ENABLE:
|
|
|
|
enableBuggify(true, BuggifyType::Client);
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::CLIENT_BUGGIFY_DISABLE:
|
|
|
|
enableBuggify(false, BuggifyType::Client);
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::CLIENT_BUGGIFY_SECTION_ACTIVATED_PROBABILITY:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
clearBuggifySections(BuggifyType::Client);
|
|
|
|
P_BUGGIFIED_SECTION_ACTIVATED[int(BuggifyType::Client)] = double(extractIntOption(value, 0, 100)) / 100.0;
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::CLIENT_BUGGIFY_SECTION_FIRED_PROBABILITY:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
P_BUGGIFIED_SECTION_FIRES[int(BuggifyType::Client)] = double(extractIntOption(value, 0, 100)) / 100.0;
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::DISABLE_CLIENT_STATISTICS_LOGGING:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
networkOptions.logClientInfo = false;
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::SUPPORTED_CLIENT_VERSIONS: {
|
|
|
|
// The multi-version API should be providing us these guarantees
|
|
|
|
ASSERT(g_network);
|
|
|
|
ASSERT(value.present());
|
|
|
|
|
|
|
|
Standalone<VectorRef<ClientVersionRef>> supportedVersions;
|
|
|
|
std::vector<StringRef> supportedVersionsStrings = value.get().splitAny(LiteralStringRef(";"));
|
|
|
|
for (StringRef versionString : supportedVersionsStrings) {
|
|
|
|
supportedVersions.push_back_deep(supportedVersions.arena(), ClientVersionRef(versionString));
|
2020-02-05 06:56:40 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(supportedVersions.size() > 0);
|
|
|
|
networkOptions.supportedVersions->set(supportedVersions);
|
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case FDBNetworkOptions::ENABLE_RUN_LOOP_PROFILING: // Same as ENABLE_SLOW_TASK_PROFILING
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
networkOptions.runLoopProfilingEnabled = true;
|
|
|
|
break;
|
|
|
|
case FDBNetworkOptions::DISTRIBUTED_CLIENT_TRACER: {
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
std::string tracer = value.get().toString();
|
|
|
|
if (tracer == "none" || tracer == "disabled") {
|
|
|
|
openTracer(TracerType::DISABLED);
|
|
|
|
} else if (tracer == "logfile" || tracer == "file" || tracer == "log_file") {
|
|
|
|
openTracer(TracerType::LOG_FILE);
|
|
|
|
} else if (tracer == "network_lossy") {
|
|
|
|
openTracer(TracerType::NETWORK_LOSSY);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, "ERROR: Unknown or unsupported tracer: `%s'", tracer.c_str());
|
|
|
|
throw invalid_option_value();
|
2021-01-14 09:20:09 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-17 07:29:02 +08:00
|
|
|
// update the network busyness on a 1s cadence
|
|
|
|
ACTOR Future<Void> monitorNetworkBusyness() {
|
|
|
|
state double prevTime = now();
|
2021-03-24 04:56:37 +08:00
|
|
|
loop {
|
|
|
|
wait(delay(CLIENT_KNOBS->NETWORK_BUSYNESS_MONITOR_INTERVAL, TaskPriority::FlushTrace));
|
|
|
|
double elapsed = now() - prevTime; // get elapsed time from last execution
|
|
|
|
prevTime = now();
|
|
|
|
struct NetworkMetrics::PriorityStats& tracker = g_network->networkInfo.metrics.starvationTrackerNetworkBusyness;
|
|
|
|
|
|
|
|
if (tracker.active) { // update metrics
|
|
|
|
tracker.duration += now() - tracker.windowedTimer;
|
|
|
|
tracker.maxDuration = std::max(tracker.maxDuration, now() - tracker.timer);
|
|
|
|
tracker.windowedTimer = now();
|
|
|
|
}
|
2021-03-17 07:29:02 +08:00
|
|
|
|
2021-03-24 04:56:37 +08:00
|
|
|
g_network->networkInfo.metrics.networkBusyness =
|
|
|
|
std::min(elapsed, tracker.duration) / elapsed; // average duration spent doing "work"
|
2021-03-17 07:29:02 +08:00
|
|
|
|
2021-03-24 04:56:37 +08:00
|
|
|
tracker.duration = 0;
|
|
|
|
tracker.maxDuration = 0;
|
2021-03-17 07:29:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-17 05:49:36 +08:00
|
|
|
// Setup g_network and start monitoring for network busyness
|
2017-05-26 04:48:44 +08:00
|
|
|
void setupNetwork(uint64_t transportId, bool useMetrics) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (g_network)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw network_already_setup();
|
|
|
|
|
|
|
|
if (!networkOptions.logClientInfo.present())
|
|
|
|
networkOptions.logClientInfo = true;
|
|
|
|
|
2020-04-19 06:48:02 +08:00
|
|
|
TLS::DisableOpenSSLAtExitHandler();
|
2020-03-05 12:14:47 +08:00
|
|
|
g_network = newNet2(tlsConfig, false, useMetrics || networkOptions.traceDirectory.present());
|
2021-03-11 02:06:03 +08:00
|
|
|
g_network->addStopCallback(Net2FileSystem::stop);
|
|
|
|
g_network->addStopCallback(TLS::DestroyOpenSSLGlobalState);
|
2019-06-19 17:44:30 +08:00
|
|
|
FlowTransport::createInstance(true, transportId);
|
2017-05-26 04:48:44 +08:00
|
|
|
Net2FileSystem::newFileSystem();
|
2021-03-16 07:23:56 +08:00
|
|
|
|
2021-03-17 07:29:02 +08:00
|
|
|
uncancellable(monitorNetworkBusyness());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void runNetwork() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!g_network) {
|
2017-05-26 04:48:44 +08:00
|
|
|
throw network_not_setup();
|
2020-05-23 04:31:06 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!g_network->checkRunnable()) {
|
2020-05-23 04:31:06 +08:00
|
|
|
throw network_cannot_be_restarted();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (networkOptions.traceDirectory.present() && networkOptions.runLoopProfilingEnabled) {
|
2020-01-29 04:09:37 +08:00
|
|
|
setupRunLoopProfiler();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
g_network->run();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (networkOptions.traceDirectory.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
systemMonitor();
|
|
|
|
}
|
|
|
|
|
|
|
|
void stopNetwork() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!g_network)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw network_not_setup();
|
|
|
|
|
|
|
|
g_network->stop();
|
|
|
|
closeTraceFile();
|
|
|
|
}
|
|
|
|
|
2020-08-06 15:01:57 +08:00
|
|
|
void DatabaseContext::updateProxies() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (proxiesLastChange == clientInfo->get().id)
|
|
|
|
return;
|
2020-08-06 15:01:57 +08:00
|
|
|
proxiesLastChange = clientInfo->get().id;
|
2020-09-11 08:44:15 +08:00
|
|
|
commitProxies.clear();
|
2020-08-06 15:01:57 +08:00
|
|
|
grvProxies.clear();
|
2020-09-11 08:44:15 +08:00
|
|
|
bool commitProxyProvisional = false, grvProxyProvisional = false;
|
|
|
|
if (clientInfo->get().commitProxies.size()) {
|
2020-11-07 15:50:55 +08:00
|
|
|
commitProxies = makeReference<CommitProxyInfo>(clientInfo->get().commitProxies, false);
|
2020-09-11 08:44:15 +08:00
|
|
|
commitProxyProvisional = clientInfo->get().commitProxies[0].provisional;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-08-06 15:01:57 +08:00
|
|
|
if (clientInfo->get().grvProxies.size()) {
|
2020-11-07 15:50:55 +08:00
|
|
|
grvProxies = makeReference<GrvProxyInfo>(clientInfo->get().grvProxies, true);
|
2020-08-27 05:41:41 +08:00
|
|
|
grvProxyProvisional = clientInfo->get().grvProxies[0].provisional;
|
2019-03-20 09:44:37 +08:00
|
|
|
}
|
2020-09-11 08:44:15 +08:00
|
|
|
if (clientInfo->get().commitProxies.size() && clientInfo->get().grvProxies.size()) {
|
|
|
|
ASSERT(commitProxyProvisional == grvProxyProvisional);
|
|
|
|
proxyProvisional = commitProxyProvisional;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-08-06 15:01:57 +08:00
|
|
|
}
|
|
|
|
|
2020-09-11 08:44:15 +08:00
|
|
|
Reference<CommitProxyInfo> DatabaseContext::getCommitProxies(bool useProvisionalProxies) {
|
2020-08-06 15:01:57 +08:00
|
|
|
updateProxies();
|
|
|
|
if (proxyProvisional && !useProvisionalProxies) {
|
2020-09-11 08:44:15 +08:00
|
|
|
return Reference<CommitProxyInfo>();
|
2019-03-20 09:44:37 +08:00
|
|
|
}
|
2020-09-11 08:44:15 +08:00
|
|
|
return commitProxies;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-07-15 15:37:41 +08:00
|
|
|
Reference<GrvProxyInfo> DatabaseContext::getGrvProxies(bool useProvisionalProxies) {
|
2020-08-06 15:01:57 +08:00
|
|
|
updateProxies();
|
|
|
|
if (proxyProvisional && !useProvisionalProxies) {
|
2020-07-15 15:37:41 +08:00
|
|
|
return Reference<GrvProxyInfo>();
|
|
|
|
}
|
|
|
|
return grvProxies;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Actor which will wait until the MultiInterface<CommitProxyInterface> returned by the DatabaseContext cx is not
|
|
|
|
// nullptr
|
2020-09-11 08:44:15 +08:00
|
|
|
ACTOR Future<Reference<CommitProxyInfo>> getCommitProxiesFuture(DatabaseContext* cx, bool useProvisionalProxies) {
|
2021-03-11 02:06:03 +08:00
|
|
|
loop {
|
2020-09-16 13:29:49 +08:00
|
|
|
Reference<CommitProxyInfo> commitProxies = cx->getCommitProxies(useProvisionalProxies);
|
|
|
|
if (commitProxies)
|
|
|
|
return commitProxies;
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(cx->onProxiesChanged());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-09-21 02:33:09 +08:00
|
|
|
// Returns a future which will not be set until the CommitProxyInfo of this DatabaseContext is not nullptr
|
2020-09-11 08:44:15 +08:00
|
|
|
Future<Reference<CommitProxyInfo>> DatabaseContext::getCommitProxiesFuture(bool useProvisionalProxies) {
|
|
|
|
return ::getCommitProxiesFuture(this, useProvisionalProxies);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void GetRangeLimits::decrement(VectorRef<KeyValueRef> const& data) {
|
2020-07-05 03:03:47 +08:00
|
|
|
if (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED) {
|
2017-05-26 04:48:44 +08:00
|
|
|
ASSERT(data.size() <= rows);
|
|
|
|
rows -= data.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
minRows = std::max(0, minRows - data.size());
|
|
|
|
|
2020-07-05 03:03:47 +08:00
|
|
|
if (bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED)
|
2021-03-11 02:06:03 +08:00
|
|
|
bytes = std::max(0, bytes - (int)data.expectedSize() - (8 - (int)sizeof(KeyValueRef)) * data.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void GetRangeLimits::decrement(KeyValueRef const& data) {
|
2017-05-26 04:48:44 +08:00
|
|
|
minRows = std::max(0, minRows - 1);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED)
|
|
|
|
rows--;
|
|
|
|
if (bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED)
|
|
|
|
bytes = std::max(0, bytes - (int)8 - (int)data.expectedSize());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// True if either the row or byte limit has been reached
|
|
|
|
bool GetRangeLimits::isReached() {
|
|
|
|
return rows == 0 || (bytes == 0 && minRows == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
// True if data would cause the row or byte limit to be reached
|
2021-03-11 02:06:03 +08:00
|
|
|
bool GetRangeLimits::reachedBy(VectorRef<KeyValueRef> const& data) {
|
2020-07-05 03:03:47 +08:00
|
|
|
return (rows != GetRangeLimits::ROW_LIMIT_UNLIMITED && data.size() >= rows) ||
|
|
|
|
(bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED &&
|
|
|
|
(int)data.expectedSize() + (8 - (int)sizeof(KeyValueRef)) * data.size() >= bytes && data.size() >= minRows);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool GetRangeLimits::hasByteLimit() {
|
2020-07-05 03:03:47 +08:00
|
|
|
return bytes != GetRangeLimits::BYTE_LIMIT_UNLIMITED;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool GetRangeLimits::hasRowLimit() {
|
2020-07-05 03:03:47 +08:00
|
|
|
return rows != GetRangeLimits::ROW_LIMIT_UNLIMITED;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool GetRangeLimits::hasSatisfiedMinRows() {
|
|
|
|
return hasByteLimit() && minRows == 0;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
AddressExclusion AddressExclusion::parse(StringRef const& key) {
|
|
|
|
// Must not change: serialized to the database!
|
2019-03-01 03:56:37 +08:00
|
|
|
auto parsedIp = IPAddress::parse(key.toString());
|
|
|
|
if (parsedIp.present()) {
|
|
|
|
return AddressExclusion(parsedIp.get());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-03-01 03:56:37 +08:00
|
|
|
|
2019-03-01 04:24:56 +08:00
|
|
|
// Not a whole machine, includes `port'.
|
2019-02-27 10:04:03 +08:00
|
|
|
try {
|
|
|
|
auto addr = NetworkAddress::parse(key.toString());
|
2019-03-01 03:56:37 +08:00
|
|
|
if (addr.isTLS()) {
|
|
|
|
TraceEvent(SevWarnAlways, "AddressExclusionParseError")
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("String", key)
|
|
|
|
.detail("Description", "Address inclusion string should not include `:tls' suffix.");
|
2019-03-01 03:56:37 +08:00
|
|
|
return AddressExclusion();
|
|
|
|
}
|
2019-02-27 10:04:03 +08:00
|
|
|
return AddressExclusion(addr.ip, addr.port);
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2019-03-19 06:03:43 +08:00
|
|
|
TraceEvent(SevWarnAlways, "AddressExclusionParseError").detail("String", key);
|
2017-05-26 04:48:44 +08:00
|
|
|
return AddressExclusion();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> getRange(Database const& cx,
|
|
|
|
Future<Version> const& fVersion,
|
|
|
|
KeySelector const& begin,
|
|
|
|
KeySelector const& end,
|
|
|
|
GetRangeLimits const& limits,
|
|
|
|
bool const& reverse,
|
|
|
|
TransactionInfo const& info,
|
|
|
|
TagSet const& tags);
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
|
|
ACTOR Future<Optional<Value>> getValue(Future<Version> version,
|
|
|
|
Key key,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
TagSet tags);
|
|
|
|
|
|
|
|
ACTOR Future<Optional<StorageServerInterface>> fetchServerInterface(Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
UID id,
|
|
|
|
TagSet tags,
|
|
|
|
Future<Version> ver = latestVersion) {
|
|
|
|
Optional<Value> val = wait(getValue(ver, serverListKeyFor(id), cx, info, Reference<TransactionLogInfo>(), tags));
|
|
|
|
if (!val.present()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// A storage server has been removed from serverList since we read keyServers
|
|
|
|
return Optional<StorageServerInterface>();
|
|
|
|
}
|
|
|
|
|
|
|
|
return decodeServerListValue(val.get());
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Optional<vector<StorageServerInterface>>> transactionalGetServerInterfaces(Future<Version> ver,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
vector<UID> ids,
|
|
|
|
TagSet tags) {
|
|
|
|
state vector<Future<Optional<StorageServerInterface>>> serverListEntries;
|
2021-03-04 11:36:21 +08:00
|
|
|
serverListEntries.reserve(ids.size());
|
|
|
|
for (int s = 0; s < ids.size(); s++) {
|
2021-03-11 02:06:03 +08:00
|
|
|
serverListEntries.push_back(fetchServerInterface(cx, info, ids[s], tags, ver));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<Optional<StorageServerInterface>> serverListValues = wait(getAll(serverListEntries));
|
2017-05-26 04:48:44 +08:00
|
|
|
vector<StorageServerInterface> serverInterfaces;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int s = 0; s < serverListValues.size(); s++) {
|
|
|
|
if (!serverListValues[s].present()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
// A storage server has been removed from ServerList since we read keyServers
|
|
|
|
return Optional<vector<StorageServerInterface>>();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
serverInterfaces.push_back(serverListValues[s].get());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
return serverInterfaces;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// If isBackward == true, returns the shard containing the key before 'key' (an infinitely long, inexpressible key).
|
|
|
|
// Otherwise returns the shard containing key
|
|
|
|
ACTOR Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation_internal(Database cx,
|
|
|
|
Key key,
|
2020-07-10 01:49:33 +08:00
|
|
|
TransactionInfo info,
|
|
|
|
bool isBackward = false) {
|
|
|
|
state Span span("NAPI:getKeyLocation"_loc, info.spanID);
|
2017-12-10 08:10:22 +08:00
|
|
|
if (isBackward) {
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(key != allKeys.begin && key <= allKeys.end);
|
2017-12-10 08:10:22 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(key < allKeys.end);
|
2017-12-10 08:10:22 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
2017-12-10 08:10:22 +08:00
|
|
|
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.Before");
|
2018-06-21 00:21:23 +08:00
|
|
|
|
2017-12-10 08:10:22 +08:00
|
|
|
loop {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionKeyServerLocationRequests;
|
2017-12-10 08:10:22 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2020-07-08 00:06:13 +08:00
|
|
|
when(GetKeyServerLocationsReply rep = wait(basicLoadBalance(
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->getCommitProxies(info.useProvisionalProxies),
|
|
|
|
&CommitProxyInterface::getKeyServersLocations,
|
2020-07-10 01:49:33 +08:00
|
|
|
GetKeyServerLocationsRequest(span.context, key, Optional<KeyRef>(), 100, isBackward, key.arena()),
|
2020-07-08 00:06:13 +08:00
|
|
|
TaskPriority::DefaultPromiseEndpoint))) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionKeyServerLocationRequestsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocation.After");
|
|
|
|
ASSERT(rep.results.size() == 1);
|
2017-08-09 01:03:04 +08:00
|
|
|
|
2017-12-16 12:13:44 +08:00
|
|
|
auto locationInfo = cx->setCachedLocation(rep.results[0].first, rep.results[0].second);
|
|
|
|
return std::make_pair(KeyRange(rep.results[0].first, rep.arena), locationInfo);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-05-11 01:05:41 +08:00
|
|
|
template <class F>
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<pair<KeyRange, Reference<LocationInfo>>> getKeyLocation(Database const& cx,
|
|
|
|
Key const& key,
|
|
|
|
F StorageServerInterface::*member,
|
|
|
|
TransactionInfo const& info,
|
|
|
|
bool isBackward = false) {
|
2020-01-08 03:42:57 +08:00
|
|
|
// we first check whether this range is cached
|
2021-03-11 02:06:03 +08:00
|
|
|
auto ssi = cx->getCachedLocation(key, isBackward);
|
2018-05-11 01:05:41 +08:00
|
|
|
if (!ssi.second) {
|
2021-03-11 02:06:03 +08:00
|
|
|
return getKeyLocation_internal(cx, key, info, isBackward);
|
2018-05-11 01:05:41 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < ssi.second->size(); i++) {
|
|
|
|
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(ssi.second->get(i, member).getEndpoint())) {
|
|
|
|
cx->invalidateCache(key);
|
2018-05-11 01:05:41 +08:00
|
|
|
ssi.second.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
return getKeyLocation_internal(cx, key, info, isBackward);
|
2018-05-11 01:05:41 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return ssi;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocations_internal(Database cx,
|
|
|
|
KeyRange keys,
|
|
|
|
int limit,
|
|
|
|
bool reverse,
|
2020-07-10 01:49:33 +08:00
|
|
|
TransactionInfo info) {
|
|
|
|
state Span span("NAPI:getKeyRangeLocations"_loc, info.spanID);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
2017-12-10 08:10:22 +08:00
|
|
|
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.Before");
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-12-10 08:10:22 +08:00
|
|
|
loop {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionKeyServerLocationRequests;
|
2017-12-10 08:10:22 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2020-07-08 00:06:13 +08:00
|
|
|
when(GetKeyServerLocationsReply _rep = wait(basicLoadBalance(
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->getCommitProxies(info.useProvisionalProxies),
|
|
|
|
&CommitProxyInterface::getKeyServersLocations,
|
2020-07-10 01:49:33 +08:00
|
|
|
GetKeyServerLocationsRequest(span.context, keys.begin, keys.end, limit, reverse, keys.arena()),
|
2020-07-08 00:06:13 +08:00
|
|
|
TaskPriority::DefaultPromiseEndpoint))) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionKeyServerLocationRequestsCompleted;
|
2017-12-16 12:13:44 +08:00
|
|
|
state GetKeyServerLocationsReply rep = _rep;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"TransactionDebug", info.debugID.get().first(), "NativeAPI.getKeyLocations.After");
|
|
|
|
ASSERT(rep.results.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
state vector<pair<KeyRange, Reference<LocationInfo>>> results;
|
2017-12-16 12:13:44 +08:00
|
|
|
state int shard = 0;
|
|
|
|
for (; shard < rep.results.size(); shard++) {
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: these shards are being inserted into the map sequentially, it would be much more CPU
|
|
|
|
// efficient to save the map pairs and insert them all at once.
|
|
|
|
results.emplace_back(rep.results[shard].first & keys,
|
|
|
|
cx->setCachedLocation(rep.results[shard].first, rep.results[shard].second));
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(yield());
|
2017-12-10 08:10:22 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-12-10 08:10:22 +08:00
|
|
|
return results;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-10-28 00:11:56 +08:00
|
|
|
// Get the SS locations for each shard in the 'keys' key-range;
|
|
|
|
// Returned vector size is the number of shards in the input keys key-range.
|
2020-11-06 08:33:07 +08:00
|
|
|
// Returned vector element is <ShardRange, storage server location info> pairs, where
|
|
|
|
// ShardRange is the whole shard key-range, not a part of the given key range.
|
|
|
|
// Example: If query the function with key range (b, d), the returned list of pairs could be something like:
|
|
|
|
// [([a, b1), locationInfo), ([b1, c), locationInfo), ([c, d1), locationInfo)].
|
2018-05-11 01:05:41 +08:00
|
|
|
template <class F>
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<vector<pair<KeyRange, Reference<LocationInfo>>>> getKeyRangeLocations(Database const& cx,
|
|
|
|
KeyRange const& keys,
|
|
|
|
int limit,
|
|
|
|
bool reverse,
|
|
|
|
F StorageServerInterface::*member,
|
|
|
|
TransactionInfo const& info) {
|
|
|
|
ASSERT(!keys.empty());
|
|
|
|
|
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations;
|
2018-05-11 01:05:41 +08:00
|
|
|
if (!cx->getCachedLocations(keys, locations, limit, reverse)) {
|
2021-03-11 02:06:03 +08:00
|
|
|
return getKeyRangeLocations_internal(cx, keys, limit, reverse, info);
|
2018-05-11 01:05:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool foundFailed = false;
|
2020-12-27 13:46:20 +08:00
|
|
|
for (const auto& [range, locInfo] : locations) {
|
2018-05-11 01:05:41 +08:00
|
|
|
bool onlyEndpointFailed = false;
|
2020-12-27 13:46:20 +08:00
|
|
|
for (int i = 0; i < locInfo->size(); i++) {
|
|
|
|
if (IFailureMonitor::failureMonitor().onlyEndpointFailed(locInfo->get(i, member).getEndpoint())) {
|
2018-05-11 01:05:41 +08:00
|
|
|
onlyEndpointFailed = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (onlyEndpointFailed) {
|
2020-12-27 13:46:20 +08:00
|
|
|
cx->invalidateCache(range.begin);
|
2018-05-11 01:05:41 +08:00
|
|
|
foundFailed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (foundFailed) {
|
|
|
|
return getKeyRangeLocations_internal(cx, keys, limit, reverse, info);
|
2018-05-11 01:05:41 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return locations;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> warmRange_impl(Transaction* self, Database cx, KeyRange keys) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state int totalRanges = 0;
|
2018-01-15 04:50:52 +08:00
|
|
|
state int totalRequests = 0;
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
|
|
|
wait(getKeyRangeLocations_internal(cx, keys, CLIENT_KNOBS->WARM_RANGE_SHARD_LIMIT, false, self->info));
|
2017-05-26 04:48:44 +08:00
|
|
|
totalRanges += CLIENT_KNOBS->WARM_RANGE_SHARD_LIMIT;
|
2018-01-15 04:50:52 +08:00
|
|
|
totalRequests++;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (locations.size() == 0 || totalRanges >= cx->locationCacheSize ||
|
|
|
|
locations[locations.size() - 1].first.end >= keys.end)
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
keys = KeyRangeRef(locations[locations.size() - 1].first.end, keys.end);
|
2018-01-15 04:50:52 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (totalRequests % 20 == 0) {
|
|
|
|
// To avoid blocking the proxies from starting other transactions, occasionally get a read version.
|
2018-01-15 04:50:52 +08:00
|
|
|
state Transaction tr(cx);
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
|
|
tr.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY);
|
|
|
|
wait(success(tr.getReadVersion()));
|
2018-01-15 04:50:52 +08:00
|
|
|
break;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
2018-01-15 04:50:52 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> Transaction::warmRange(Database cx, KeyRange keys) {
|
|
|
|
return warmRange_impl(this, cx, keys);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Optional<Value>> getValue(Future<Version> version,
|
|
|
|
Key key,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
TagSet tags) {
|
|
|
|
state Version ver = wait(version);
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:getValue"_loc, info.spanID);
|
2021-01-21 09:05:58 +08:00
|
|
|
span.addTag("key"_sr, key);
|
2019-04-23 06:48:47 +08:00
|
|
|
cx->validateVersion(ver);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state pair<KeyRange, Reference<LocationInfo>> ssi =
|
|
|
|
wait(getKeyLocation(cx, key, &StorageServerInterface::getValue, info));
|
2017-05-26 04:48:44 +08:00
|
|
|
state Optional<UID> getValueID = Optional<UID>();
|
|
|
|
state uint64_t startTime;
|
|
|
|
state double startTimeD;
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
2019-05-11 05:01:52 +08:00
|
|
|
getValueID = nondeterministicRandom()->randomUniqueID();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
g_traceBatch.addAttach("GetValueAttachID", info.debugID.get().first(), getValueID.get().first());
|
2021-03-11 02:06:03 +08:00
|
|
|
g_traceBatch.addEvent("GetValueDebug",
|
|
|
|
getValueID.get().first(),
|
|
|
|
"NativeAPI.getValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
|
2017-05-26 04:48:44 +08:00
|
|
|
/*TraceEvent("TransactionDebugGetValueInfo", getValueID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Key", key)
|
|
|
|
.detail("ReqVersion", ver)
|
|
|
|
.detail("Servers", describe(ssi.second->get()));*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
++cx->getValueSubmitted;
|
|
|
|
startTime = timer_int();
|
|
|
|
startTimeD = now();
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionPhysicalReads;
|
2019-06-21 10:11:29 +08:00
|
|
|
|
2019-06-20 07:53:14 +08:00
|
|
|
state GetValueReply reply;
|
2020-03-06 06:00:44 +08:00
|
|
|
try {
|
|
|
|
if (CLIENT_BUGGIFY) {
|
|
|
|
throw deterministicRandom()->randomChoice(
|
2021-03-11 02:06:03 +08:00
|
|
|
std::vector<Error>{ transaction_too_old(), future_version() });
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
choose {
|
|
|
|
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetValueReply _reply = wait(loadBalance(
|
|
|
|
cx.getPtr(),
|
|
|
|
ssi.second,
|
|
|
|
&StorageServerInterface::getValue,
|
|
|
|
GetValueRequest(
|
|
|
|
span.context, key, ver, cx->sampleReadTags() ? tags : Optional<TagSet>(), getValueID),
|
|
|
|
TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
false,
|
|
|
|
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
2020-03-06 06:00:44 +08:00
|
|
|
reply = _reply;
|
|
|
|
}
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
|
|
|
throw;
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2019-06-21 10:11:29 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
double latency = now() - startTimeD;
|
|
|
|
cx->readLatencies.addSample(latency);
|
|
|
|
if (trLogInfo) {
|
|
|
|
int valueSize = reply.value.present() ? reply.value.get().size() : 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(
|
|
|
|
FdbClientLogEvents::EventGet(startTimeD, cx->clientLocality.dcId(), latency, valueSize, key));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
cx->getValueCompleted->latency = timer_int() - startTime;
|
|
|
|
cx->getValueCompleted->log();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
|
|
|
g_traceBatch.addEvent("GetValueDebug",
|
|
|
|
getValueID.get().first(),
|
|
|
|
"NativeAPI.getValue.After"); //.detail("TaskID", g_network->getCurrentTask());
|
2017-05-26 04:48:44 +08:00
|
|
|
/*TraceEvent("TransactionDebugGetValueDone", getValueID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Key", key)
|
|
|
|
.detail("ReqVersion", ver)
|
|
|
|
.detail("ReplySize", reply.value.present() ? reply.value.get().size() : -1);*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
|
|
|
|
cx->transactionBytesRead += reply.value.present() ? reply.value.get().size() : 0;
|
|
|
|
++cx->transactionKeysRead;
|
2017-05-26 04:48:44 +08:00
|
|
|
return reply.value;
|
|
|
|
} catch (Error& e) {
|
|
|
|
cx->getValueCompleted->latency = timer_int() - startTime;
|
|
|
|
cx->getValueCompleted->log();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
|
|
|
g_traceBatch.addEvent("GetValueDebug",
|
|
|
|
getValueID.get().first(),
|
|
|
|
"NativeAPI.getValue.Error"); //.detail("TaskID", g_network->getCurrentTask());
|
2017-05-26 04:48:44 +08:00
|
|
|
/*TraceEvent("TransactionDebugGetValueDone", getValueID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Key", key)
|
|
|
|
.detail("ReqVersion", ver)
|
|
|
|
.detail("ReplySize", reply.value.present() ? reply.value.get().size() : -1);*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
|
2021-03-11 02:06:03 +08:00
|
|
|
(e.code() == error_code_transaction_too_old && ver == latestVersion)) {
|
|
|
|
cx->invalidateCache(key);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
|
|
|
if (trLogInfo)
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventGetError(
|
|
|
|
startTimeD, cx->clientLocality.dcId(), static_cast<int>(e.code()), key));
|
2017-05-26 04:48:44 +08:00
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Key> getKey(Database cx, KeySelector k, Future<Version> version, TransactionInfo info, TagSet tags) {
|
2019-04-17 05:22:31 +08:00
|
|
|
wait(success(version));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-03-21 02:23:11 +08:00
|
|
|
state Optional<UID> getKeyID = Optional<UID>();
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:getKey"_loc, info.spanID);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
2020-03-21 02:23:11 +08:00
|
|
|
getKeyID = nondeterministicRandom()->randomUniqueID();
|
|
|
|
|
|
|
|
g_traceBatch.addAttach("GetKeyAttachID", info.debugID.get().first(), getKeyID.get().first());
|
2021-03-11 02:06:03 +08:00
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"GetKeyDebug",
|
|
|
|
getKeyID.get().first(),
|
|
|
|
"NativeAPI.getKey.AfterVersion"); //.detail("StartKey",
|
|
|
|
// k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
|
2020-03-21 02:23:11 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
if (k.getKey() == allKeys.end) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (k.offset > 0)
|
|
|
|
return allKeys.end;
|
2017-05-26 04:48:44 +08:00
|
|
|
k.orEqual = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (k.getKey() == allKeys.begin && k.offset <= 0) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return Key();
|
|
|
|
}
|
|
|
|
|
2018-01-07 05:49:47 +08:00
|
|
|
Key locationKey(k.getKey(), k.arena());
|
2021-03-11 02:06:03 +08:00
|
|
|
state pair<KeyRange, Reference<LocationInfo>> ssi =
|
|
|
|
wait(getKeyLocation(cx, locationKey, &StorageServerInterface::getKey, info, k.isBackward()));
|
2018-06-21 00:21:23 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"GetKeyDebug",
|
|
|
|
getKeyID.get().first(),
|
|
|
|
"NativeAPI.getKey.Before"); //.detail("StartKey",
|
|
|
|
// k.getKey()).detail("Offset",k.offset).detail("OrEqual",k.orEqual);
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionPhysicalReads;
|
2019-06-20 07:53:14 +08:00
|
|
|
state GetKeyReply reply;
|
2020-03-06 06:00:44 +08:00
|
|
|
try {
|
|
|
|
choose {
|
|
|
|
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
|
|
|
when(GetKeyReply _reply =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(loadBalance(cx.getPtr(),
|
|
|
|
ssi.second,
|
|
|
|
&StorageServerInterface::getKey,
|
|
|
|
GetKeyRequest(span.context,
|
|
|
|
k,
|
|
|
|
version.get(),
|
|
|
|
cx->sampleReadTags() ? tags : Optional<TagSet>(),
|
|
|
|
getKeyID),
|
|
|
|
TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
false,
|
2020-07-08 00:06:13 +08:00
|
|
|
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
2020-03-06 06:00:44 +08:00
|
|
|
reply = _reply;
|
|
|
|
}
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2020-06-06 07:27:04 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
|
|
|
throw;
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
g_traceBatch.addEvent("GetKeyDebug",
|
|
|
|
getKeyID.get().first(),
|
|
|
|
"NativeAPI.getKey.After"); //.detail("NextKey",reply.sel.key).detail("Offset",
|
|
|
|
// reply.sel.offset).detail("OrEqual", k.orEqual);
|
2017-05-26 04:48:44 +08:00
|
|
|
k = reply.sel;
|
|
|
|
if (!k.offset && k.orEqual) {
|
|
|
|
return k.getKey();
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
2020-03-21 02:23:11 +08:00
|
|
|
g_traceBatch.addEvent("GetKeyDebug", getKeyID.get().first(), "NativeAPI.getKey.Error");
|
2017-05-26 04:48:44 +08:00
|
|
|
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
|
|
|
cx->invalidateCache(k.getKey(), k.isBackward());
|
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(SevInfo, "GetKeyError").error(e).detail("AtKey", k.getKey()).detail("Offset", k.offset);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Version> waitForCommittedVersion(Database cx, Version version, SpanID spanContext) {
|
2020-07-08 00:06:13 +08:00
|
|
|
state Span span("NAPI:waitForCommittedVersion"_loc, { spanContext });
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
|
|
|
when(GetReadVersionReply v =
|
|
|
|
wait(basicLoadBalance(cx->getGrvProxies(false),
|
|
|
|
&GrvProxyInterface::getConsistentReadVersion,
|
|
|
|
GetReadVersionRequest(span.context, 0, TransactionPriority::IMMEDIATE),
|
|
|
|
cx->taskID))) {
|
2019-04-23 06:48:47 +08:00
|
|
|
cx->minAcceptableReadVersion = std::min(cx->minAcceptableReadVersion, v.version);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (v.midShardSize > 0)
|
|
|
|
cx->smoothMidShardSize.setTotal(v.midShardSize);
|
2017-05-26 04:48:44 +08:00
|
|
|
if (v.version >= version)
|
|
|
|
return v.version;
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: Do the wait on the server side, possibly use less expensive source of committed version
|
|
|
|
// (causal consistency is not needed for this purpose)
|
|
|
|
wait(delay(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, cx->taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevError, "WaitForCommittedVersionError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Version> getRawVersion(Database cx, SpanID spanContext) {
|
2020-07-08 00:06:13 +08:00
|
|
|
state Span span("NAPI:getRawVersion"_loc, { spanContext });
|
2020-01-11 04:23:59 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
|
|
|
when(GetReadVersionReply v =
|
|
|
|
wait(basicLoadBalance(cx->getGrvProxies(false),
|
|
|
|
&GrvProxyInterface::getConsistentReadVersion,
|
|
|
|
GetReadVersionRequest(spanContext, 0, TransactionPriority::IMMEDIATE),
|
|
|
|
cx->taskID))) {
|
2020-01-11 04:23:59 +08:00
|
|
|
return v.version;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-02-13 07:38:15 +08:00
|
|
|
ACTOR Future<Void> readVersionBatcher(
|
2021-03-11 02:06:03 +08:00
|
|
|
DatabaseContext* cx,
|
|
|
|
FutureStream<std::pair<Promise<GetReadVersionReply>, Optional<UID>>> versionStream,
|
|
|
|
uint32_t flags);
|
|
|
|
|
|
|
|
ACTOR Future<Version> watchValue(Future<Version> version,
|
|
|
|
Key key,
|
|
|
|
Optional<Value> value,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
|
|
|
state Version ver = wait(version);
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:watchValue"_loc, info.spanID);
|
2019-04-23 06:48:47 +08:00
|
|
|
cx->validateVersion(ver);
|
2017-05-26 04:48:44 +08:00
|
|
|
ASSERT(ver != latestVersion);
|
|
|
|
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state pair<KeyRange, Reference<LocationInfo>> ssi =
|
|
|
|
wait(getKeyLocation(cx, key, &StorageServerInterface::watchValue, info));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
try {
|
|
|
|
state Optional<UID> watchValueID = Optional<UID>();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
2019-05-11 05:01:52 +08:00
|
|
|
watchValueID = nondeterministicRandom()->randomUniqueID();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
g_traceBatch.addAttach("WatchValueAttachID", info.debugID.get().first(), watchValueID.get().first());
|
2021-03-11 02:06:03 +08:00
|
|
|
g_traceBatch.addEvent("WatchValueDebug",
|
|
|
|
watchValueID.get().first(),
|
|
|
|
"NativeAPI.watchValue.Before"); //.detail("TaskID", g_network->getCurrentTask());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-29 05:40:50 +08:00
|
|
|
state WatchValueReply resp;
|
2019-04-23 06:48:47 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(WatchValueReply r =
|
|
|
|
wait(loadBalance(cx.getPtr(),
|
|
|
|
ssi.second,
|
|
|
|
&StorageServerInterface::watchValue,
|
|
|
|
WatchValueRequest(span.context,
|
|
|
|
key,
|
|
|
|
value,
|
|
|
|
ver,
|
|
|
|
cx->sampleReadTags() ? tags : Optional<TagSet>(),
|
|
|
|
watchValueID),
|
|
|
|
TaskPriority::DefaultPromiseEndpoint))) {
|
2019-04-23 06:48:47 +08:00
|
|
|
resp = r;
|
|
|
|
}
|
2019-07-27 06:05:02 +08:00
|
|
|
when(wait(cx->connectionFile ? cx->connectionFile->onChange() : Never())) { wait(Never()); }
|
2019-04-23 06:48:47 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
|
|
|
g_traceBatch.addEvent("WatchValueDebug",
|
|
|
|
watchValueID.get().first(),
|
|
|
|
"NativeAPI.watchValue.After"); //.detail("TaskID", g_network->getCurrentTask());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: wait for known committed version on the storage server before replying,
|
|
|
|
// cannot do this until the storage server is notified on knownCommittedVersion changes from tlog (faster
|
|
|
|
// than the current update loop)
|
2020-07-10 01:49:33 +08:00
|
|
|
Version v = wait(waitForCommittedVersion(cx, resp.version, span.context));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-29 05:40:50 +08:00
|
|
|
//TraceEvent("WatcherCommitted").detail("CommittedVersion", v).detail("WatchVersion", resp.version).detail("Key", key ).detail("Value", value);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-29 06:21:58 +08:00
|
|
|
// False if there is a master failure between getting the response and getting the committed version,
|
|
|
|
// Dependent on SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT
|
2021-02-03 09:29:03 +08:00
|
|
|
if (v - resp.version < 50000000) {
|
|
|
|
return resp.version;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
ver = v;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->invalidateCache(key);
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (e.code() == error_code_watch_cancelled || e.code() == error_code_process_behind) {
|
2021-04-20 01:27:19 +08:00
|
|
|
// clang-format off
|
2021-03-11 08:13:50 +08:00
|
|
|
TEST(e.code() == error_code_watch_cancelled); // Too many watches on the storage server, poll for changes instead
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(e.code() == error_code_process_behind); // The storage servers are all behind
|
2021-04-20 01:27:19 +08:00
|
|
|
// clang-format on
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID));
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (e.code() == error_code_timed_out) { // The storage server occasionally times out watches in case
|
|
|
|
// it was cancelled
|
|
|
|
TEST(true); // A watch timed out
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, info.taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
|
|
|
state Error err = e;
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, info.taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
throw err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
ACTOR Future<Void> watchStorageServerResp(KeyRef key, Database cx) {
|
2021-02-06 08:23:37 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2021-02-04 09:01:26 +08:00
|
|
|
state Reference<WatchMetadata> metadata = cx->getWatchMetadata(key);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!metadata.isValid())
|
|
|
|
return Void();
|
2021-02-04 09:01:26 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Version watchVersion = wait(watchValue(Future<Version>(metadata->version),
|
|
|
|
metadata->key,
|
|
|
|
metadata->value,
|
|
|
|
cx,
|
|
|
|
metadata->info,
|
|
|
|
metadata->tags));
|
2021-02-10 01:32:37 +08:00
|
|
|
|
|
|
|
metadata = cx->getWatchMetadata(key);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!metadata.isValid())
|
|
|
|
return Void();
|
|
|
|
|
2021-02-03 09:29:03 +08:00
|
|
|
if (watchVersion >= metadata->version) { // case 1: version_1 (SS) >= version_2 (map)
|
|
|
|
cx->deleteWatchMetadata(key);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (metadata->watchPromise.canBeSet())
|
|
|
|
metadata->watchPromise.send(watchVersion);
|
|
|
|
} else { // ABA happens
|
2021-02-10 06:56:30 +08:00
|
|
|
TEST(true); // ABA issue where the version returned from the server is less than the version in the map
|
2021-03-11 02:06:03 +08:00
|
|
|
if (metadata->watchPromise.getFutureReferenceCount() ==
|
|
|
|
1) { // case 2: version_1 < version_2 and future_count == 1
|
2021-02-03 09:29:03 +08:00
|
|
|
cx->deleteWatchMetadata(key);
|
2021-02-02 07:42:13 +08:00
|
|
|
}
|
2021-02-06 08:23:37 +08:00
|
|
|
}
|
2021-02-12 04:51:37 +08:00
|
|
|
} catch (Error& e) {
|
2021-02-10 01:32:37 +08:00
|
|
|
if (e.code() == error_code_operation_cancelled) {
|
2021-02-06 08:23:37 +08:00
|
|
|
throw e;
|
|
|
|
}
|
2021-02-11 02:19:59 +08:00
|
|
|
|
2021-02-06 08:23:37 +08:00
|
|
|
Reference<WatchMetadata> metadata = cx->getWatchMetadata(key);
|
2021-02-10 01:32:37 +08:00
|
|
|
if (!metadata.isValid()) {
|
|
|
|
return Void();
|
|
|
|
} else if (metadata->watchPromise.getFutureReferenceCount() == 1) {
|
|
|
|
cx->deleteWatchMetadata(key);
|
2021-02-06 08:23:37 +08:00
|
|
|
return Void();
|
2021-02-19 07:23:28 +08:00
|
|
|
} else if (e.code() == error_code_future_version) {
|
|
|
|
continue;
|
2021-02-06 08:23:37 +08:00
|
|
|
}
|
2021-02-12 04:51:37 +08:00
|
|
|
cx->deleteWatchMetadata(key);
|
|
|
|
metadata->watchPromise.sendError(e);
|
|
|
|
throw e;
|
2021-02-10 01:32:37 +08:00
|
|
|
}
|
2021-02-04 09:01:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> sameVersionDiffValue(Version ver,
|
|
|
|
Key key,
|
|
|
|
Optional<Value> value,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2021-02-04 09:01:26 +08:00
|
|
|
state ReadYourWritesTransaction tr(cx);
|
2021-02-12 04:51:37 +08:00
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
tr.setOption(FDBTransactionOptions::READ_SYSTEM_KEYS);
|
|
|
|
state Optional<Value> valSS = wait(tr.get(key));
|
|
|
|
Reference<WatchMetadata> metadata = cx->getWatchMetadata(key.contents());
|
2021-02-04 09:01:26 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
if (metadata.isValid() &&
|
|
|
|
valSS != metadata->value) { // val_3 != val_1 (storage server value doesnt match value in map)
|
|
|
|
cx->deleteWatchMetadata(key.contents());
|
2021-02-02 07:42:13 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata->watchPromise.send(ver);
|
|
|
|
metadata->watchFutureSS.cancel();
|
|
|
|
}
|
2021-02-04 09:01:26 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (valSS ==
|
|
|
|
value) { // val_3 == val_2 (storage server value matches value passed into the function -> new watch)
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata = makeReference<WatchMetadata>(key, value, ver, info, tags);
|
|
|
|
KeyRef keyRef = cx->setWatchMetadata(metadata);
|
2021-02-05 01:51:53 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata->watchFutureSS = watchStorageServerResp(keyRef, cx);
|
|
|
|
}
|
2021-02-05 01:51:53 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (valSS != value)
|
|
|
|
return Void(); // if val_3 != val_2
|
2021-02-05 01:51:53 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
wait(success(metadata->watchPromise.getFuture())); // val_3 == val_2
|
2021-02-04 09:01:26 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
|
|
|
wait(tr.onError(e));
|
|
|
|
}
|
2021-02-02 07:42:13 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> getWatchFuture(Version ver,
|
|
|
|
Key key,
|
|
|
|
Optional<Value> value,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2021-02-12 04:51:37 +08:00
|
|
|
Reference<WatchMetadata> metadata = cx->getWatchMetadata(key.contents());
|
2021-02-02 07:42:13 +08:00
|
|
|
|
2021-02-10 06:56:30 +08:00
|
|
|
if (!metadata.isValid()) { // case 1: key not in map
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata = makeReference<WatchMetadata>(key, value, ver, info, tags);
|
|
|
|
KeyRef keyRef = cx->setWatchMetadata(metadata);
|
2021-02-03 09:29:03 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata->watchFutureSS = watchStorageServerResp(keyRef, cx);
|
2021-02-10 01:32:37 +08:00
|
|
|
return success(metadata->watchPromise.getFuture());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (metadata->value == value) { // case 2: val_1 == val_2 (received watch with same value as key already in
|
|
|
|
// the map so just update)
|
2021-02-03 09:29:03 +08:00
|
|
|
if (ver > metadata->version) {
|
|
|
|
metadata->version = ver;
|
|
|
|
metadata->info = info;
|
|
|
|
metadata->tags = tags;
|
2021-02-02 08:07:14 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
|
2021-02-10 01:32:37 +08:00
|
|
|
return success(metadata->watchPromise.getFuture());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (ver > metadata->version) { // case 3: val_1 != val_2 && version_2 > version_1 (recived watch with
|
|
|
|
// different value and a higher version so recreate in SS)
|
2021-02-10 06:56:30 +08:00
|
|
|
TEST(true); // Setting a watch that has a different value than the one in the map but a higher version (newer)
|
2021-02-12 04:51:37 +08:00
|
|
|
cx->deleteWatchMetadata(key.contents());
|
2021-02-04 09:01:26 +08:00
|
|
|
|
2021-02-03 09:29:03 +08:00
|
|
|
metadata->watchPromise.send(ver);
|
|
|
|
metadata->watchFutureSS.cancel();
|
2021-02-02 07:42:13 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata = makeReference<WatchMetadata>(key, value, ver, info, tags);
|
|
|
|
KeyRef keyRef = cx->setWatchMetadata(metadata);
|
2021-02-05 01:51:53 +08:00
|
|
|
|
2021-02-12 04:51:37 +08:00
|
|
|
metadata->watchFutureSS = watchStorageServerResp(keyRef, cx);
|
2021-02-05 01:51:53 +08:00
|
|
|
|
2021-02-10 01:32:37 +08:00
|
|
|
return success(metadata->watchPromise.getFuture());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (metadata->version == ver) { // case 5: val_1 != val_2 && version_1 == version_2 (recived watch with
|
|
|
|
// different value but same version)
|
2021-02-10 06:56:30 +08:00
|
|
|
TEST(true); // Setting a watch which has a different value than the one in the map but the same version
|
2021-02-10 07:06:36 +08:00
|
|
|
return sameVersionDiffValue(ver, key, value, cx, info, tags);
|
2021-02-02 07:42:13 +08:00
|
|
|
}
|
2021-02-10 06:56:30 +08:00
|
|
|
TEST(true); // Setting a watch which has a different value than the one in the map but a lower version (older)
|
2021-02-10 02:33:19 +08:00
|
|
|
// case 4: val_1 != val_2 && version_2 < version_1
|
2021-02-10 01:32:37 +08:00
|
|
|
return Void();
|
2021-02-04 09:01:26 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> watchValueMap(Future<Version> version,
|
|
|
|
Key key,
|
|
|
|
Optional<Value> value,
|
|
|
|
Database cx,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2021-02-04 09:01:26 +08:00
|
|
|
state Version ver = wait(version);
|
2021-02-17 02:32:03 +08:00
|
|
|
wait(getWatchFuture(ver, key, value, cx, info, tags));
|
|
|
|
return Void();
|
2021-02-02 07:42:13 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void transformRangeLimits(GetRangeLimits limits, bool reverse, GetKeyValuesRequest& req) {
|
|
|
|
if (limits.bytes != 0) {
|
|
|
|
if (!limits.hasRowLimit())
|
2017-05-26 04:48:44 +08:00
|
|
|
req.limit = CLIENT_KNOBS->REPLY_BYTE_LIMIT; // Can't get more than this many rows anyway
|
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
req.limit = std::min(CLIENT_KNOBS->REPLY_BYTE_LIMIT, limits.rows);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse)
|
2017-05-26 04:48:44 +08:00
|
|
|
req.limit *= -1;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!limits.hasByteLimit())
|
2017-05-26 04:48:44 +08:00
|
|
|
req.limitBytes = CLIENT_KNOBS->REPLY_BYTE_LIMIT;
|
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
req.limitBytes = std::min(CLIENT_KNOBS->REPLY_BYTE_LIMIT, limits.bytes);
|
|
|
|
} else {
|
2017-05-26 04:48:44 +08:00
|
|
|
req.limitBytes = CLIENT_KNOBS->REPLY_BYTE_LIMIT;
|
|
|
|
req.limit = reverse ? -limits.minRows : limits.minRows;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
ACTOR Future<RangeResult> getExactRange(Database cx,
|
|
|
|
Version version,
|
|
|
|
KeyRange keys,
|
|
|
|
GetRangeLimits limits,
|
|
|
|
bool reverse,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
|
|
|
state RangeResult output;
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:getExactRange"_loc, info.spanID);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// printf("getExactRange( '%s', '%s' )\n", keys.begin.toString().c_str(), keys.end.toString().c_str());
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state vector<pair<KeyRange, Reference<LocationInfo>>> locations = wait(getKeyRangeLocations(
|
|
|
|
cx, keys, CLIENT_KNOBS->GET_RANGE_SHARD_LIMIT, reverse, &StorageServerInterface::getKeyValues, info));
|
|
|
|
ASSERT(locations.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
state int shard = 0;
|
|
|
|
loop {
|
|
|
|
const KeyRangeRef& range = locations[shard].first;
|
|
|
|
|
|
|
|
GetKeyValuesRequest req;
|
|
|
|
req.version = version;
|
2021-03-11 02:06:03 +08:00
|
|
|
req.begin = firstGreaterOrEqual(range.begin);
|
|
|
|
req.end = firstGreaterOrEqual(range.end);
|
2020-07-10 01:49:33 +08:00
|
|
|
req.spanContext = span.context;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
transformRangeLimits(limits, reverse, req);
|
|
|
|
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: buggify byte limits on internal functions that use them, instead of globally
|
2020-04-11 04:29:28 +08:00
|
|
|
req.tags = cx->sampleReadTags() ? tags : Optional<TagSet>();
|
2017-05-26 04:48:44 +08:00
|
|
|
req.debugID = info.debugID;
|
|
|
|
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"TransactionDebug", info.debugID.get().first(), "NativeAPI.getExactRange.Before");
|
2017-05-26 04:48:44 +08:00
|
|
|
/*TraceEvent("TransactionDebugGetExactRangeInfo", info.debugID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("ReqBeginKey", req.begin.getKey())
|
|
|
|
.detail("ReqEndKey", req.end.getKey())
|
|
|
|
.detail("ReqLimit", req.limit)
|
|
|
|
.detail("ReqLimitBytes", req.limitBytes)
|
|
|
|
.detail("ReqVersion", req.version)
|
|
|
|
.detail("Reverse", reverse)
|
|
|
|
.detail("Servers", locations[shard].second->description());*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionPhysicalReads;
|
2019-06-20 07:53:14 +08:00
|
|
|
state GetKeyValuesReply rep;
|
2020-03-06 06:00:44 +08:00
|
|
|
try {
|
|
|
|
choose {
|
|
|
|
when(wait(cx->connectionFileChanged())) { throw transaction_too_old(); }
|
2021-03-11 02:06:03 +08:00
|
|
|
when(GetKeyValuesReply _rep =
|
|
|
|
wait(loadBalance(cx.getPtr(),
|
|
|
|
locations[shard].second,
|
|
|
|
&StorageServerInterface::getKeyValues,
|
|
|
|
req,
|
|
|
|
TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
false,
|
|
|
|
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr))) {
|
2020-03-06 06:00:44 +08:00
|
|
|
rep = _rep;
|
|
|
|
}
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
|
|
|
throw;
|
2019-06-20 07:53:14 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"TransactionDebug", info.debugID.get().first(), "NativeAPI.getExactRange.After");
|
|
|
|
output.arena().dependsOn(rep.arena);
|
|
|
|
output.append(output.arena(), rep.data.begin(), rep.data.size());
|
|
|
|
|
|
|
|
if (limits.hasRowLimit() && rep.data.size() > limits.rows) {
|
|
|
|
TraceEvent(SevError, "GetExactRangeTooManyRows")
|
|
|
|
.detail("RowLimit", limits.rows)
|
|
|
|
.detail("DeliveredRows", output.size());
|
|
|
|
ASSERT(false);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
limits.decrement(rep.data);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
if (limits.isReached()) {
|
|
|
|
output.more = true;
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool more = rep.more;
|
|
|
|
// If the reply says there is more but we know that we finished the shard, then fix rep.more
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse && more && rep.data.size() > 0 &&
|
|
|
|
output[output.size() - 1].key == locations[shard].first.begin)
|
2017-05-26 04:48:44 +08:00
|
|
|
more = false;
|
|
|
|
|
|
|
|
if (more) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!rep.data.size()) {
|
|
|
|
TraceEvent(SevError, "GetExactRangeError")
|
|
|
|
.detail("Reason", "More data indicated but no rows present")
|
|
|
|
.detail("LimitBytes", limits.bytes)
|
|
|
|
.detail("LimitRows", limits.rows)
|
|
|
|
.detail("OutputSize", output.size())
|
|
|
|
.detail("OutputBytes", output.expectedSize())
|
|
|
|
.detail("BlockSize", rep.data.size())
|
|
|
|
.detail("BlockBytes", rep.data.expectedSize());
|
|
|
|
ASSERT(false);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // GetKeyValuesReply.more in getExactRange
|
2017-05-26 04:48:44 +08:00
|
|
|
// Make next request to the same shard with a beginning key just after the last key returned
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse)
|
|
|
|
locations[shard].first =
|
|
|
|
KeyRangeRef(locations[shard].first.begin, output[output.size() - 1].key);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
locations[shard].first =
|
|
|
|
KeyRangeRef(keyAfter(output[output.size() - 1].key), locations[shard].first.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!more || locations[shard].first.empty()) {
|
2020-11-12 05:07:54 +08:00
|
|
|
TEST(true); // getExactrange (!more || locations[shard].first.empty())
|
2021-03-11 02:06:03 +08:00
|
|
|
if (shard == locations.size() - 1) {
|
2017-05-26 04:48:44 +08:00
|
|
|
const KeyRangeRef& range = locations[shard].first;
|
|
|
|
KeyRef begin = reverse ? keys.begin : range.end;
|
|
|
|
KeyRef end = reverse ? range.begin : keys.end;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin >= end) {
|
2017-05-26 04:48:44 +08:00
|
|
|
output.more = false;
|
|
|
|
return output;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // Multiple requests of key locations
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
keys = KeyRangeRef(begin, end);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
++shard;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Soft byte limit - return results early if the user specified a byte limit and we got results
|
|
|
|
// This can prevent problems where the desired range spans many shards and would be too slow to
|
|
|
|
// fetch entirely.
|
2021-03-11 02:06:03 +08:00
|
|
|
if (limits.hasSatisfiedMinRows() && output.size() > 0) {
|
2017-05-26 04:48:44 +08:00
|
|
|
output.more = true;
|
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
|
|
|
|
const KeyRangeRef& range = locations[shard].first;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse)
|
|
|
|
keys = KeyRangeRef(keys.begin, range.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
keys = KeyRangeRef(range.begin, keys.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->invalidateCache(keys);
|
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
break;
|
|
|
|
} else {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevInfo, "GetExactRangeError")
|
2021-03-11 02:06:03 +08:00
|
|
|
.error(e)
|
|
|
|
.detail("ShardBegin", locations[shard].first.begin)
|
|
|
|
.detail("ShardEnd", locations[shard].first.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Key> resolveKey(Database const& cx,
|
|
|
|
KeySelector const& key,
|
|
|
|
Version const& version,
|
|
|
|
TransactionInfo const& info,
|
|
|
|
TagSet const& tags) {
|
|
|
|
if (key.isFirstGreaterOrEqual())
|
|
|
|
return Future<Key>(key.getKey());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (key.isFirstGreaterThan())
|
|
|
|
return Future<Key>(keyAfter(key.getKey()));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return getKey(cx, key, version, info, tags);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
ACTOR Future<RangeResult> getRangeFallback(Database cx,
|
|
|
|
Version version,
|
|
|
|
KeySelector begin,
|
|
|
|
KeySelector end,
|
|
|
|
GetRangeLimits limits,
|
|
|
|
bool reverse,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (version == latestVersion) {
|
2017-07-27 04:45:11 +08:00
|
|
|
state Transaction transaction(cx);
|
|
|
|
transaction.setOption(FDBTransactionOptions::CAUSAL_READ_RISKY);
|
|
|
|
transaction.setOption(FDBTransactionOptions::LOCK_AWARE);
|
|
|
|
transaction.setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
|
2021-03-11 02:06:03 +08:00
|
|
|
Version ver = wait(transaction.getReadVersion());
|
2017-07-27 04:45:11 +08:00
|
|
|
version = ver;
|
|
|
|
}
|
|
|
|
|
2020-03-21 02:23:11 +08:00
|
|
|
Future<Key> fb = resolveKey(cx, begin, version, info, tags);
|
|
|
|
state Future<Key> fe = resolveKey(cx, end, version, info, tags);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
state Key b = wait(fb);
|
|
|
|
state Key e = wait(fe);
|
|
|
|
if (b >= e) {
|
2021-05-04 04:14:16 +08:00
|
|
|
return RangeResult();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// if e is allKeys.end, we have read through the end of the database
|
|
|
|
// if b is allKeys.begin, we have either read through the beginning of the database,
|
|
|
|
// or allKeys.begin exists in the database and will be part of the conflict range anyways
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult _r = wait(getExactRange(cx, version, KeyRangeRef(b, e), limits, reverse, info, tags));
|
|
|
|
RangeResult r = _r;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (b == allKeys.begin && ((reverse && !r.more) || !reverse))
|
2017-05-26 04:48:44 +08:00
|
|
|
r.readToBegin = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e == allKeys.end && ((!reverse && !r.more) || reverse))
|
2017-05-26 04:48:44 +08:00
|
|
|
r.readThroughEnd = true;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(!limits.hasRowLimit() || r.size() <= limits.rows);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// If we were limiting bytes and the returned range is twice the request (plus 10K) log a warning
|
2021-03-11 02:06:03 +08:00
|
|
|
if (limits.hasByteLimit() &&
|
|
|
|
r.expectedSize() >
|
|
|
|
size_t(limits.bytes + CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT + CLIENT_KNOBS->VALUE_SIZE_LIMIT + 1) &&
|
|
|
|
limits.minRows == 0) {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevWarnAlways, "GetRangeFallbackTooMuchData")
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("LimitBytes", limits.bytes)
|
|
|
|
.detail("DeliveredBytes", r.expectedSize())
|
|
|
|
.detail("LimitRows", limits.rows)
|
|
|
|
.detail("DeliveredRows", r.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void getRangeFinished(Database cx,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
double startTime,
|
|
|
|
KeySelector begin,
|
|
|
|
KeySelector end,
|
|
|
|
bool snapshot,
|
|
|
|
Promise<std::pair<Key, Key>> conflictRange,
|
|
|
|
bool reverse,
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult result) {
|
2020-03-06 06:00:44 +08:00
|
|
|
int64_t bytes = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const KeyValueRef& kv : result) {
|
2020-03-06 06:00:44 +08:00
|
|
|
bytes += kv.key.size() + kv.value.size();
|
|
|
|
}
|
|
|
|
|
|
|
|
cx->transactionBytesRead += bytes;
|
|
|
|
cx->transactionKeysRead += result.size();
|
2020-08-13 03:05:01 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (trLogInfo) {
|
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventGetRange(
|
|
|
|
startTime, cx->clientLocality.dcId(), now() - startTime, bytes, begin.getKey(), end.getKey()));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!snapshot) {
|
2017-11-03 04:39:06 +08:00
|
|
|
Key rangeBegin;
|
|
|
|
Key rangeEnd;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (result.readToBegin) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeBegin = allKeys.begin;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (((!reverse || !result.more || begin.offset > 1) && begin.offset > 0) || result.size() == 0) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeBegin = Key(begin.getKey(), begin.arena());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeBegin = reverse ? result.end()[-1].key : result[0].key;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (end.offset > begin.offset && end.getKey() < rangeBegin) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeBegin = Key(end.getKey(), end.arena());
|
|
|
|
}
|
2017-10-20 06:36:32 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (result.readThroughEnd) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeEnd = allKeys.end;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (((reverse || !result.more || end.offset <= 0) && end.offset <= 1) || result.size() == 0) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeEnd = Key(end.getKey(), end.arena());
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeEnd = keyAfter(reverse ? result[0].key : result.end()[-1].key);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin.offset < end.offset && begin.getKey() > rangeEnd) {
|
2017-11-03 04:39:06 +08:00
|
|
|
rangeEnd = Key(begin.getKey(), begin.arena());
|
2017-10-20 06:36:32 +08:00
|
|
|
}
|
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
conflictRange.send(std::make_pair(rangeBegin, rangeEnd));
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
ACTOR Future<RangeResult> getRange(Database cx,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
Future<Version> fVersion,
|
|
|
|
KeySelector begin,
|
|
|
|
KeySelector end,
|
|
|
|
GetRangeLimits limits,
|
|
|
|
Promise<std::pair<Key, Key>> conflictRange,
|
|
|
|
bool snapshot,
|
|
|
|
bool reverse,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2021-03-11 02:06:03 +08:00
|
|
|
state GetRangeLimits originalLimits(limits);
|
2017-11-03 04:39:06 +08:00
|
|
|
state KeySelector originalBegin = begin;
|
|
|
|
state KeySelector originalEnd = end;
|
2021-05-04 04:14:16 +08:00
|
|
|
state RangeResult output;
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:getRange"_loc, info.spanID);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
state Version version = wait(fVersion);
|
2019-04-23 06:48:47 +08:00
|
|
|
cx->validateVersion(version);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
state double startTime = now();
|
2021-03-11 02:06:03 +08:00
|
|
|
state Version readVersion = version; // Needed for latestVersion requests; if more, make future requests at the
|
|
|
|
// version that the first one completed
|
|
|
|
// FIXME: Is this really right? Weaken this and see if there is a problem;
|
|
|
|
// if so maybe there is a much subtler problem even with this.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin.getKey() == allKeys.begin && begin.offset < 1) {
|
2017-11-03 04:39:06 +08:00
|
|
|
output.readToBegin = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
begin = KeySelector(firstGreaterOrEqual(begin.getKey()), begin.arena());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(!limits.isReached());
|
|
|
|
ASSERT((!limits.hasRowLimit() || limits.rows >= limits.minRows) && limits.minRows >= 0);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (end.getKey() == allKeys.begin && (end.offset < 1 || end.isFirstGreaterOrEqual())) {
|
|
|
|
getRangeFinished(
|
|
|
|
cx, trLogInfo, startTime, originalBegin, originalEnd, snapshot, conflictRange, reverse, output);
|
2017-11-03 04:39:06 +08:00
|
|
|
return output;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2018-01-07 05:49:47 +08:00
|
|
|
Key locationKey = reverse ? Key(end.getKey(), end.arena()) : Key(begin.getKey(), begin.arena());
|
2021-03-11 02:06:03 +08:00
|
|
|
bool locationBackward = reverse ? (end - 1).isBackward() : begin.isBackward();
|
|
|
|
state pair<KeyRange, Reference<LocationInfo>> beginServer =
|
|
|
|
wait(getKeyLocation(cx, locationKey, &StorageServerInterface::getKeyValues, info, locationBackward));
|
2017-11-03 04:39:06 +08:00
|
|
|
state KeyRange shard = beginServer.first;
|
|
|
|
state bool modifiedSelectors = false;
|
|
|
|
state GetKeyValuesRequest req;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-07-10 01:08:27 +08:00
|
|
|
req.isFetchKeys = (info.taskID == TaskPriority::FetchKeys);
|
2017-11-03 04:39:06 +08:00
|
|
|
req.version = readVersion;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse && (begin - 1).isDefinitelyLess(shard.begin) &&
|
|
|
|
(!begin.isFirstGreaterOrEqual() ||
|
|
|
|
begin.getKey() != shard.begin)) { // In this case we would be setting modifiedSelectors to true, but
|
|
|
|
// not modifying anything
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
req.begin = firstGreaterOrEqual(shard.begin);
|
2017-11-03 04:39:06 +08:00
|
|
|
modifiedSelectors = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else
|
|
|
|
req.begin = begin;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!reverse && end.isDefinitelyGreater(shard.end)) {
|
|
|
|
req.end = firstGreaterOrEqual(shard.end);
|
2017-11-03 04:39:06 +08:00
|
|
|
modifiedSelectors = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else
|
|
|
|
req.end = end;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
transformRangeLimits(limits, reverse, req);
|
|
|
|
ASSERT(req.limitBytes > 0 && req.limit != 0 && req.limit < 0 == reverse);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-04-11 04:29:28 +08:00
|
|
|
req.tags = cx->sampleReadTags() ? tags : Optional<TagSet>();
|
2017-11-03 04:39:06 +08:00
|
|
|
req.debugID = info.debugID;
|
2020-07-10 01:49:33 +08:00
|
|
|
req.spanContext = span.context;
|
2017-11-03 04:39:06 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.Before");
|
|
|
|
/*TraceEvent("TransactionDebugGetRangeInfo", info.debugID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("ReqBeginKey", req.begin.getKey())
|
|
|
|
.detail("ReqEndKey", req.end.getKey())
|
|
|
|
.detail("OriginalBegin", originalBegin.toString())
|
|
|
|
.detail("OriginalEnd", originalEnd.toString())
|
|
|
|
.detail("Begin", begin.toString())
|
|
|
|
.detail("End", end.toString())
|
|
|
|
.detail("Shard", shard)
|
|
|
|
.detail("ReqLimit", req.limit)
|
|
|
|
.detail("ReqLimitBytes", req.limitBytes)
|
|
|
|
.detail("ReqVersion", req.version)
|
|
|
|
.detail("Reverse", reverse)
|
|
|
|
.detail("ModifiedSelectors", modifiedSelectors)
|
|
|
|
.detail("Servers", beginServer.second->description());*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionPhysicalReads;
|
2020-03-06 06:00:44 +08:00
|
|
|
state GetKeyValuesReply rep;
|
|
|
|
try {
|
|
|
|
if (CLIENT_BUGGIFY) {
|
2021-03-11 02:06:03 +08:00
|
|
|
throw deterministicRandom()->randomChoice(
|
|
|
|
std::vector<Error>{ transaction_too_old(), future_version() });
|
2020-03-06 06:00:44 +08:00
|
|
|
}
|
2021-04-08 01:59:45 +08:00
|
|
|
state AnnotateActor annotation(currentLineage);
|
2020-03-27 07:50:58 +08:00
|
|
|
GetKeyValuesReply _rep =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(loadBalance(cx.getPtr(),
|
|
|
|
beginServer.second,
|
|
|
|
&StorageServerInterface::getKeyValues,
|
|
|
|
req,
|
|
|
|
TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
false,
|
|
|
|
cx->enableLocalityLoadBalance ? &cx->queueModel : nullptr));
|
2020-03-06 06:00:44 +08:00
|
|
|
rep = _rep;
|
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionPhysicalReadsCompleted;
|
|
|
|
throw;
|
2019-04-04 08:37:14 +08:00
|
|
|
}
|
2017-11-03 04:39:06 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
|
|
|
g_traceBatch.addEvent("TransactionDebug",
|
|
|
|
info.debugID.get().first(),
|
|
|
|
"NativeAPI.getRange.After"); //.detail("SizeOf", rep.data.size());
|
2017-11-03 04:39:06 +08:00
|
|
|
/*TraceEvent("TransactionDebugGetRangeDone", info.debugID.get())
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("ReqBeginKey", req.begin.getKey())
|
|
|
|
.detail("ReqEndKey", req.end.getKey())
|
|
|
|
.detail("RepIsMore", rep.more)
|
|
|
|
.detail("VersionReturned", rep.version)
|
|
|
|
.detail("RowsReturned", rep.data.size());*/
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(!rep.more || rep.data.size());
|
|
|
|
ASSERT(!limits.hasRowLimit() || rep.data.size() <= limits.rows);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
limits.decrement(rep.data);
|
2017-11-03 04:39:06 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse && begin.isLastLessOrEqual() && rep.data.size() &&
|
|
|
|
rep.data.end()[-1].key == begin.getKey()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
modifiedSelectors = false;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool finished = limits.isReached() || (!modifiedSelectors && !rep.more) || limits.hasSatisfiedMinRows();
|
2017-11-03 04:39:06 +08:00
|
|
|
bool readThrough = modifiedSelectors && !rep.more;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
// optimization: first request got all data--just return it
|
2021-03-11 02:06:03 +08:00
|
|
|
if (finished && !output.size()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
bool readToBegin = output.readToBegin;
|
|
|
|
bool readThroughEnd = output.readThroughEnd;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
output = RangeResult(RangeResultRef(rep.data, modifiedSelectors || limits.isReached() || rep.more),
|
|
|
|
rep.arena);
|
2017-11-03 04:39:06 +08:00
|
|
|
output.readToBegin = readToBegin;
|
|
|
|
output.readThroughEnd = readThroughEnd;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (BUGGIFY && limits.hasByteLimit() && output.size() > std::max(1, originalLimits.minRows)) {
|
2017-11-03 04:39:06 +08:00
|
|
|
output.more = true;
|
2021-03-11 02:06:03 +08:00
|
|
|
output.resize(
|
|
|
|
output.arena(),
|
|
|
|
deterministicRandom()->randomInt(std::max(1, originalLimits.minRows), output.size()));
|
|
|
|
getRangeFinished(cx,
|
|
|
|
trLogInfo,
|
|
|
|
startTime,
|
|
|
|
originalBegin,
|
|
|
|
originalEnd,
|
|
|
|
snapshot,
|
|
|
|
conflictRange,
|
|
|
|
reverse,
|
|
|
|
output);
|
2017-11-03 04:39:06 +08:00
|
|
|
return output;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (readThrough) {
|
|
|
|
output.arena().dependsOn(shard.arena());
|
2017-11-03 04:39:06 +08:00
|
|
|
output.readThrough = reverse ? shard.begin : shard.end;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
getRangeFinished(
|
|
|
|
cx, trLogInfo, startTime, originalBegin, originalEnd, snapshot, conflictRange, reverse, output);
|
2017-11-03 04:39:06 +08:00
|
|
|
return output;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
output.arena().dependsOn(rep.arena);
|
2017-11-03 04:39:06 +08:00
|
|
|
output.append(output.arena(), rep.data.begin(), rep.data.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (finished) {
|
|
|
|
if (readThrough) {
|
|
|
|
output.arena().dependsOn(shard.arena());
|
2017-11-03 04:39:06 +08:00
|
|
|
output.readThrough = reverse ? shard.begin : shard.end;
|
|
|
|
}
|
|
|
|
output.more = modifiedSelectors || limits.isReached() || rep.more;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
getRangeFinished(
|
|
|
|
cx, trLogInfo, startTime, originalBegin, originalEnd, snapshot, conflictRange, reverse, output);
|
2017-11-03 04:39:06 +08:00
|
|
|
return output;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
readVersion = rep.version; // see above comment
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!rep.more) {
|
|
|
|
ASSERT(modifiedSelectors);
|
|
|
|
TEST(true); // !GetKeyValuesReply.more and modifiedSelectors in getRange
|
|
|
|
|
|
|
|
if (!rep.data.size()) {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult result = wait(getRangeFallback(
|
2021-03-11 02:06:03 +08:00
|
|
|
cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags));
|
|
|
|
getRangeFinished(cx,
|
|
|
|
trLogInfo,
|
|
|
|
startTime,
|
|
|
|
originalBegin,
|
|
|
|
originalEnd,
|
|
|
|
snapshot,
|
|
|
|
conflictRange,
|
|
|
|
reverse,
|
|
|
|
result);
|
2017-05-26 04:48:44 +08:00
|
|
|
return result;
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (reverse)
|
|
|
|
end = firstGreaterOrEqual(shard.begin);
|
2017-11-03 04:39:06 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
begin = firstGreaterOrEqual(shard.end);
|
2017-11-03 04:39:06 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // GetKeyValuesReply.more in getRange
|
|
|
|
if (reverse)
|
|
|
|
end = firstGreaterOrEqual(output[output.size() - 1].key);
|
2017-11-03 04:39:06 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
begin = firstGreaterThan(output[output.size() - 1].key);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (info.debugID.present()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
g_traceBatch.addEvent("TransactionDebug", info.debugID.get().first(), "NativeAPI.getRange.Error");
|
|
|
|
TraceEvent("TransactionDebugError", info.debugID.get()).error(e);
|
|
|
|
}
|
|
|
|
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed ||
|
2021-03-11 02:06:03 +08:00
|
|
|
(e.code() == error_code_transaction_too_old && readVersion == latestVersion)) {
|
|
|
|
cx->invalidateCache(reverse ? end.getKey() : begin.getKey(),
|
|
|
|
reverse ? (end - 1).isBackward() : begin.isBackward());
|
2017-11-03 04:39:06 +08:00
|
|
|
|
|
|
|
if (e.code() == error_code_wrong_shard_server) {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult result = wait(getRangeFallback(
|
2021-03-11 02:06:03 +08:00
|
|
|
cx, version, originalBegin, originalEnd, originalLimits, reverse, info, tags));
|
|
|
|
getRangeFinished(cx,
|
|
|
|
trLogInfo,
|
|
|
|
startTime,
|
|
|
|
originalBegin,
|
|
|
|
originalEnd,
|
|
|
|
snapshot,
|
|
|
|
conflictRange,
|
|
|
|
reverse,
|
|
|
|
result);
|
2017-11-03 04:39:06 +08:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
|
2017-11-03 04:39:06 +08:00
|
|
|
} else {
|
|
|
|
if (trLogInfo)
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventGetRangeError(startTime,
|
|
|
|
cx->clientLocality.dcId(),
|
|
|
|
static_cast<int>(e.code()),
|
|
|
|
begin.getKey(),
|
|
|
|
end.getKey()));
|
2017-11-03 04:39:06 +08:00
|
|
|
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (conflictRange.canBeSet()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
conflictRange.send(std::make_pair(Key(), Key()));
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> getRange(Database const& cx,
|
|
|
|
Future<Version> const& fVersion,
|
|
|
|
KeySelector const& begin,
|
|
|
|
KeySelector const& end,
|
|
|
|
GetRangeLimits const& limits,
|
|
|
|
bool const& reverse,
|
|
|
|
TransactionInfo const& info,
|
|
|
|
TagSet const& tags) {
|
2021-03-11 02:06:03 +08:00
|
|
|
return getRange(cx,
|
|
|
|
Reference<TransactionLogInfo>(),
|
|
|
|
fVersion,
|
|
|
|
begin,
|
|
|
|
end,
|
|
|
|
limits,
|
|
|
|
Promise<std::pair<Key, Key>>(),
|
|
|
|
true,
|
|
|
|
reverse,
|
|
|
|
info,
|
|
|
|
tags);
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
|
|
|
|
2020-05-13 05:11:03 +08:00
|
|
|
bool DatabaseContext::debugUseTags = false;
|
2021-03-11 02:06:03 +08:00
|
|
|
const std::vector<std::string> DatabaseContext::debugTransactionTagChoices = { "a", "b", "c", "d", "e", "f", "g",
|
|
|
|
"h", "i", "j", "k", "l", "m", "n",
|
|
|
|
"o", "p", "q", "r", "s", "t" };
|
2020-05-05 01:15:18 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void debugAddTags(Transaction* tr) {
|
|
|
|
int numTags = deterministicRandom()->randomInt(0, CLIENT_KNOBS->MAX_TAGS_PER_TRANSACTION + 1);
|
|
|
|
for (int i = 0; i < numTags; ++i) {
|
2020-05-05 01:15:18 +08:00
|
|
|
TransactionTag tag;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (deterministicRandom()->random01() < 0.7) {
|
2020-05-05 01:15:18 +08:00
|
|
|
tag = TransactionTagRef(deterministicRandom()->randomChoice(DatabaseContext::debugTransactionTagChoices));
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
|
|
|
int length = deterministicRandom()->randomInt(1, CLIENT_KNOBS->MAX_TRANSACTION_TAG_LENGTH + 1);
|
2020-05-05 01:15:18 +08:00
|
|
|
uint8_t* s = new (tag.arena()) uint8_t[length];
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int j = 0; j < length; ++j) {
|
2020-05-05 01:15:18 +08:00
|
|
|
s[j] = (uint8_t)deterministicRandom()->randomInt(0, 256);
|
|
|
|
}
|
|
|
|
|
|
|
|
tag.contents() = TransactionTagRef(s, length);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (deterministicRandom()->coinflip()) {
|
2020-05-05 01:15:18 +08:00
|
|
|
tr->options.readTags.addTag(tag);
|
|
|
|
}
|
|
|
|
tr->options.tags.addTag(tag);
|
|
|
|
}
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
|
|
|
|
2020-12-08 03:24:14 +08:00
|
|
|
SpanID generateSpanID(int transactionTracingEnabled) {
|
2020-12-08 06:43:44 +08:00
|
|
|
uint64_t tid = deterministicRandom()->randomUInt64();
|
2020-12-08 03:24:14 +08:00
|
|
|
if (transactionTracingEnabled > 0) {
|
2020-12-08 06:43:44 +08:00
|
|
|
return SpanID(tid, deterministicRandom()->randomUInt64());
|
2020-12-04 06:06:11 +08:00
|
|
|
} else {
|
2020-12-08 06:43:44 +08:00
|
|
|
return SpanID(tid, 0);
|
2020-12-04 06:06:11 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Transaction::Transaction()
|
2021-03-11 02:06:03 +08:00
|
|
|
: info(TaskPriority::DefaultEndpoint, generateSpanID(true)), span(info.spanID, "Transaction"_loc) {}
|
2020-12-04 06:06:11 +08:00
|
|
|
|
2020-07-10 01:49:33 +08:00
|
|
|
Transaction::Transaction(Database const& cx)
|
2020-12-08 03:24:14 +08:00
|
|
|
: cx(cx), info(cx->taskID, generateSpanID(cx->transactionTracingEnabled)), backoff(CLIENT_KNOBS->DEFAULT_BACKOFF),
|
2020-07-10 01:49:33 +08:00
|
|
|
committedVersion(invalidVersion), versionstampPromise(Promise<Standalone<StringRef>>()), options(cx), numErrors(0),
|
2020-08-28 06:11:16 +08:00
|
|
|
trLogInfo(createTrLogInfoProbabilistically(cx)), tr(info.spanID), span(info.spanID, "Transaction"_loc) {
|
2020-07-10 01:49:33 +08:00
|
|
|
if (DatabaseContext::debugUseTags) {
|
2020-05-05 01:15:18 +08:00
|
|
|
debugAddTags(this);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Transaction::~Transaction() {
|
|
|
|
flushTrLogsIfEnabled();
|
|
|
|
cancelWatches();
|
|
|
|
}
|
|
|
|
|
2020-06-10 08:33:41 +08:00
|
|
|
void Transaction::operator=(Transaction&& r) noexcept {
|
2017-05-26 04:48:44 +08:00
|
|
|
flushTrLogsIfEnabled();
|
|
|
|
cx = std::move(r.cx);
|
|
|
|
tr = std::move(r.tr);
|
|
|
|
readVersion = std::move(r.readVersion);
|
2019-03-01 09:45:00 +08:00
|
|
|
metadataVersion = std::move(r.metadataVersion);
|
2017-05-26 04:48:44 +08:00
|
|
|
extraConflictRanges = std::move(r.extraConflictRanges);
|
|
|
|
commitResult = std::move(r.commitResult);
|
|
|
|
committing = std::move(r.committing);
|
|
|
|
options = std::move(r.options);
|
|
|
|
info = r.info;
|
|
|
|
backoff = r.backoff;
|
|
|
|
numErrors = r.numErrors;
|
|
|
|
committedVersion = r.committedVersion;
|
|
|
|
versionstampPromise = std::move(r.versionstampPromise);
|
|
|
|
watches = r.watches;
|
|
|
|
trLogInfo = std::move(r.trLogInfo);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::flushTrLogsIfEnabled() {
|
|
|
|
if (trLogInfo && trLogInfo->logsAdded && trLogInfo->trLogWriter.getData()) {
|
|
|
|
ASSERT(trLogInfo->flushed == false);
|
2019-02-13 05:54:24 +08:00
|
|
|
cx->clientStatusUpdater.inStatusQ.push_back({ trLogInfo->identifier, std::move(trLogInfo->trLogWriter) });
|
2017-05-26 04:48:44 +08:00
|
|
|
trLogInfo->flushed = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::setVersion(Version v) {
|
2017-05-26 04:48:44 +08:00
|
|
|
startTime = now();
|
|
|
|
if (readVersion.isValid())
|
|
|
|
throw read_version_already_set();
|
|
|
|
if (v <= 0)
|
|
|
|
throw version_invalid();
|
|
|
|
readVersion = v;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Optional<Value>> Transaction::get(const Key& key, bool snapshot) {
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionLogicalReads;
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionGetValueRequests;
|
2021-03-11 02:06:03 +08:00
|
|
|
// ASSERT (key < allKeys.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// There are no keys in the database with size greater than KEY_SIZE_LIMIT
|
|
|
|
if (key.size() >
|
|
|
|
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
2017-05-26 04:48:44 +08:00
|
|
|
return Optional<Value>();
|
|
|
|
|
|
|
|
auto ver = getReadVersion();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
/* if (!systemKeys.contains(key))
|
|
|
|
return Optional<Value>(Value()); */
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!snapshot)
|
2017-05-26 04:48:44 +08:00
|
|
|
tr.transaction.read_conflict_ranges.push_back(tr.arena, singleKeyRange(key, tr.arena));
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (key == metadataVersionKey) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionMetadataVersionReads;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!ver.isReady() || metadataVersion.isSet()) {
|
2019-03-01 09:45:00 +08:00
|
|
|
return metadataVersion.getFuture();
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (ver.isError())
|
|
|
|
return ver.getError();
|
|
|
|
if (ver.get() == cx->metadataVersionCache[cx->mvCacheInsertLocation].first) {
|
2019-03-03 05:55:41 +08:00
|
|
|
return cx->metadataVersionCache[cx->mvCacheInsertLocation].second;
|
2019-03-01 09:45:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
Version v = ver.get();
|
2019-03-03 05:55:41 +08:00
|
|
|
int hi = cx->mvCacheInsertLocation;
|
2021-03-11 02:06:03 +08:00
|
|
|
int lo = (cx->mvCacheInsertLocation + 1) % cx->metadataVersionCache.size();
|
2019-03-01 09:45:00 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
while (hi != lo) {
|
|
|
|
int cu = hi > lo ? (hi + lo) / 2
|
|
|
|
: ((hi + cx->metadataVersionCache.size() + lo) / 2) % cx->metadataVersionCache.size();
|
|
|
|
if (v == cx->metadataVersionCache[cu].first) {
|
2019-03-01 09:45:00 +08:00
|
|
|
return cx->metadataVersionCache[cu].second;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (cu == lo) {
|
2019-03-01 09:45:00 +08:00
|
|
|
break;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (v < cx->metadataVersionCache[cu].first) {
|
2019-03-01 09:45:00 +08:00
|
|
|
hi = cu;
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
lo = (cu + 1) % cx->metadataVersionCache.size();
|
2019-03-01 09:45:00 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return getValue(ver, key, cx, info, trLogInfo, options.readTags);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Watch::setWatch(Future<Void> watchFuture) {
|
|
|
|
this->watchFuture = watchFuture;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Cause the watch loop to go around and start waiting on watchFuture
|
2017-05-26 04:48:44 +08:00
|
|
|
onSetWatchTrigger.send(Void());
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// FIXME: This seems pretty horrible. Now a Database can't die until all of its watches do...
|
2020-04-28 06:03:56 +08:00
|
|
|
ACTOR Future<Void> watch(Reference<Watch> watch, Database cx, TagSet tags, TransactionInfo info) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
choose {
|
|
|
|
// RYOW write to value that is being watched (if applicable)
|
|
|
|
// Errors
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(watch->onChangeTrigger.getFuture())) {}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// NativeAPI finished commit and updated watchFuture
|
2018-08-11 04:57:10 +08:00
|
|
|
when(wait(watch->onSetWatchTrigger.getFuture())) {
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-04-23 06:48:47 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
|
|
|
// NativeAPI watchValue future finishes or errors
|
|
|
|
when(wait(watch->watchFuture)) { break; }
|
|
|
|
|
2019-06-20 07:53:14 +08:00
|
|
|
when(wait(cx->connectionFileChanged())) {
|
2019-04-23 06:48:47 +08:00
|
|
|
TEST(true); // Recreated a watch after switch
|
2021-02-10 01:32:37 +08:00
|
|
|
cx->clearWatchMetadata();
|
2019-04-23 06:48:47 +08:00
|
|
|
watch->watchFuture =
|
2021-02-02 07:42:13 +08:00
|
|
|
watchValueMap(cx->minAcceptableReadVersion, watch->key, watch->value, cx, info, tags);
|
2019-04-23 06:48:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
cx->removeWatch();
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
|
|
|
|
cx->removeWatch();
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2020-01-11 04:23:59 +08:00
|
|
|
Future<Version> Transaction::getRawReadVersion() {
|
2020-07-10 01:49:33 +08:00
|
|
|
return ::getRawVersion(cx, info.spanID);
|
2020-01-11 04:23:59 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> Transaction::watch(Reference<Watch> watch) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionWatchRequests;
|
2020-03-27 01:29:23 +08:00
|
|
|
cx->addWatch();
|
|
|
|
watches.push_back(watch);
|
2020-04-28 06:03:56 +08:00
|
|
|
return ::watch(watch, cx, options.readTags, info);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Standalone<VectorRef<const char*>>> getAddressesForKeyActor(Key key,
|
|
|
|
Future<Version> ver,
|
|
|
|
Database cx,
|
2019-09-06 05:58:39 +08:00
|
|
|
TransactionInfo info,
|
|
|
|
TransactionOptions options) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state vector<StorageServerInterface> ssi;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// If key >= allKeys.end, then getRange will return a kv-pair with an empty value. This will result in our
|
|
|
|
// serverInterfaces vector being empty, which will cause us to return an empty addresses list.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
state Key ksKey = keyServersKey(key);
|
2021-05-04 04:14:16 +08:00
|
|
|
state RangeResult serverTagResult = wait(getRange(cx,
|
|
|
|
ver,
|
|
|
|
lastLessOrEqual(serverTagKeys.begin),
|
|
|
|
firstGreaterThan(serverTagKeys.end),
|
|
|
|
GetRangeLimits(CLIENT_KNOBS->TOO_MANY),
|
|
|
|
false,
|
|
|
|
info,
|
|
|
|
options.readTags));
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(!serverTagResult.more && serverTagResult.size() < CLIENT_KNOBS->TOO_MANY);
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> futureServerUids = getRange(
|
2021-03-11 02:06:03 +08:00
|
|
|
cx, ver, lastLessOrEqual(ksKey), firstGreaterThan(ksKey), GetRangeLimits(1), false, info, options.readTags);
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult serverUids = wait(futureServerUids);
|
2021-03-11 02:06:03 +08:00
|
|
|
|
|
|
|
ASSERT(serverUids.size()); // every shard needs to have a team
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
vector<UID> src;
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<UID> ignore; // 'ignore' is so named because it is the vector into which we decode the 'dest' servers in the
|
|
|
|
// case where this key is being relocated. But 'src' is the canonical location until the move is
|
|
|
|
// finished, because it could be cancelled at any time.
|
2020-04-06 05:30:09 +08:00
|
|
|
decodeKeyServersValue(serverTagResult, serverUids[0].value, src, ignore);
|
2021-03-11 02:06:03 +08:00
|
|
|
Optional<vector<StorageServerInterface>> serverInterfaces =
|
|
|
|
wait(transactionalGetServerInterfaces(ver, cx, info, src, options.readTags));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT(serverInterfaces.present()); // since this is happening transactionally, /FF/keyServers and /FF/serverList
|
|
|
|
// need to be consistent with one another
|
2017-05-26 04:48:44 +08:00
|
|
|
ssi = serverInterfaces.get();
|
|
|
|
|
|
|
|
Standalone<VectorRef<const char*>> addresses;
|
|
|
|
for (auto i : ssi) {
|
2019-09-06 05:58:39 +08:00
|
|
|
std::string ipString = options.includePort ? i.address().toString() : i.address().ip.toString();
|
2021-03-11 02:06:03 +08:00
|
|
|
char* c_string = new (addresses.arena()) char[ipString.length() + 1];
|
2017-05-26 04:48:44 +08:00
|
|
|
strcpy(c_string, ipString.c_str());
|
|
|
|
addresses.push_back(addresses.arena(), c_string);
|
|
|
|
}
|
|
|
|
return addresses;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Standalone<VectorRef<const char*>>> Transaction::getAddressesForKey(const Key& key) {
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionLogicalReads;
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionGetAddressesForKeyRequests;
|
2017-05-26 04:48:44 +08:00
|
|
|
auto ver = getReadVersion();
|
|
|
|
|
2019-09-06 05:58:39 +08:00
|
|
|
return getAddressesForKeyActor(key, ver, cx, info, options);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Key> getKeyAndConflictRange(Database cx,
|
|
|
|
KeySelector k,
|
|
|
|
Future<Version> version,
|
|
|
|
Promise<std::pair<Key, Key>> conflictRange,
|
|
|
|
TransactionInfo info,
|
|
|
|
TagSet tags) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
Key rep = wait(getKey(cx, k, version, info, tags));
|
|
|
|
if (k.offset <= 0)
|
|
|
|
conflictRange.send(std::make_pair(rep, k.orEqual ? keyAfter(k.getKey()) : Key(k.getKey(), k.arena())));
|
2017-05-26 04:48:44 +08:00
|
|
|
else
|
2021-03-11 02:06:03 +08:00
|
|
|
conflictRange.send(
|
|
|
|
std::make_pair(k.orEqual ? keyAfter(k.getKey()) : Key(k.getKey(), k.arena()), keyAfter(rep)));
|
2020-05-10 04:04:48 +08:00
|
|
|
return rep;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-10-21 00:17:47 +08:00
|
|
|
conflictRange.send(std::make_pair(Key(), Key()));
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Key> Transaction::getKey(const KeySelector& key, bool snapshot) {
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionLogicalReads;
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionGetKeyRequests;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (snapshot)
|
2020-04-10 07:55:56 +08:00
|
|
|
return ::getKey(cx, key, getReadVersion(), info, options.readTags);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-10-21 00:17:47 +08:00
|
|
|
Promise<std::pair<Key, Key>> conflictRange;
|
2021-03-11 02:06:03 +08:00
|
|
|
extraConflictRanges.push_back(conflictRange.getFuture());
|
|
|
|
return getKeyAndConflictRange(cx, key, getReadVersion(), conflictRange, info, options.readTags);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> Transaction::getRange(const KeySelector& begin,
|
|
|
|
const KeySelector& end,
|
|
|
|
GetRangeLimits limits,
|
|
|
|
bool snapshot,
|
|
|
|
bool reverse) {
|
2018-02-08 03:56:47 +08:00
|
|
|
++cx->transactionLogicalReads;
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionGetRangeRequests;
|
2018-02-08 03:56:47 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (limits.isReached())
|
2021-05-04 04:14:16 +08:00
|
|
|
return RangeResult();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!limits.isValid())
|
2017-05-26 04:48:44 +08:00
|
|
|
return range_limits_invalid();
|
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
ASSERT(limits.rows != 0);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-11-03 04:39:06 +08:00
|
|
|
KeySelector b = begin;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (b.orEqual) {
|
2017-11-03 04:39:06 +08:00
|
|
|
TEST(true); // Native begin orEqual==true
|
|
|
|
b.removeOrEqual(b.arena());
|
|
|
|
}
|
|
|
|
|
|
|
|
KeySelector e = end;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.orEqual) {
|
2017-11-03 04:39:06 +08:00
|
|
|
TEST(true); // Native end orEqual==true
|
|
|
|
e.removeOrEqual(e.arena());
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (b.offset >= e.offset && b.getKey() >= e.getKey()) {
|
2017-11-03 04:39:06 +08:00
|
|
|
TEST(true); // Native range inverted
|
2021-05-04 04:14:16 +08:00
|
|
|
return RangeResult();
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2017-10-21 00:17:47 +08:00
|
|
|
Promise<std::pair<Key, Key>> conflictRange;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!snapshot) {
|
|
|
|
extraConflictRanges.push_back(conflictRange.getFuture());
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
return ::getRange(
|
|
|
|
cx, trLogInfo, getReadVersion(), b, e, limits, conflictRange, snapshot, reverse, info, options.readTags);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-05-04 04:14:16 +08:00
|
|
|
Future<RangeResult> Transaction::getRange(const KeySelector& begin,
|
|
|
|
const KeySelector& end,
|
|
|
|
int limit,
|
|
|
|
bool snapshot,
|
|
|
|
bool reverse) {
|
2021-03-11 02:06:03 +08:00
|
|
|
return getRange(begin, end, GetRangeLimits(limit), snapshot, reverse);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::addReadConflictRange(KeyRangeRef const& keys) {
|
|
|
|
ASSERT(!keys.empty());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
|
|
|
// we can translate it to an equivalent one with smaller keys
|
2017-05-26 04:48:44 +08:00
|
|
|
KeyRef begin = keys.begin;
|
|
|
|
KeyRef end = keys.end;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin.size() >
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
begin = begin.substr(
|
|
|
|
0,
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
|
|
|
if (end.size() >
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
end = end.substr(
|
|
|
|
0,
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (r.empty()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.transaction.read_conflict_ranges.push_back_deep(tr.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::makeSelfConflicting() {
|
|
|
|
BinaryWriter wr(Unversioned());
|
|
|
|
wr.serializeBytes(LiteralStringRef("\xFF/SC/"));
|
2019-05-11 05:01:52 +08:00
|
|
|
wr << deterministicRandom()->randomUniqueID();
|
2021-03-11 02:06:03 +08:00
|
|
|
auto r = singleKeyRange(wr.toValue(), tr.arena);
|
|
|
|
tr.transaction.read_conflict_ranges.push_back(tr.arena, r);
|
|
|
|
tr.transaction.write_conflict_ranges.push_back(tr.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::set(const KeyRef& key, const ValueRef& value, bool addConflictRange) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionSetMutations;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (key.size() >
|
|
|
|
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
2017-05-26 04:48:44 +08:00
|
|
|
throw key_too_large();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (value.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw value_too_large();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& req = tr;
|
|
|
|
auto& t = req.transaction;
|
|
|
|
auto r = singleKeyRange(key, req.arena);
|
|
|
|
auto v = ValueRef(req.arena, value);
|
2020-06-20 10:32:30 +08:00
|
|
|
t.mutations.emplace_back(req.arena, MutationRef::SetValue, r.begin, v);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (addConflictRange) {
|
|
|
|
t.write_conflict_ranges.push_back(req.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::atomicOp(const KeyRef& key,
|
|
|
|
const ValueRef& operand,
|
|
|
|
MutationRef::Type operationType,
|
|
|
|
bool addConflictRange) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionAtomicMutations;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (key.size() >
|
|
|
|
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
2017-05-26 04:48:44 +08:00
|
|
|
throw key_too_large();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (operand.size() > CLIENT_KNOBS->VALUE_SIZE_LIMIT)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw value_too_large();
|
|
|
|
|
2017-10-11 04:02:22 +08:00
|
|
|
if (apiVersionAtLeast(510)) {
|
|
|
|
if (operationType == MutationRef::Min)
|
2017-10-26 05:48:05 +08:00
|
|
|
operationType = MutationRef::MinV2;
|
2017-10-11 04:02:22 +08:00
|
|
|
else if (operationType == MutationRef::And)
|
2017-10-26 05:48:05 +08:00
|
|
|
operationType = MutationRef::AndV2;
|
2017-10-11 04:02:22 +08:00
|
|
|
}
|
2018-03-22 09:58:19 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& req = tr;
|
|
|
|
auto& t = req.transaction;
|
|
|
|
auto r = singleKeyRange(key, req.arena);
|
|
|
|
auto v = ValueRef(req.arena, operand);
|
2018-03-22 09:58:19 +08:00
|
|
|
|
2020-06-20 10:32:30 +08:00
|
|
|
t.mutations.emplace_back(req.arena, operationType, r.begin, v);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-05-04 12:43:50 +08:00
|
|
|
if (addConflictRange && operationType != MutationRef::SetVersionstampedKey)
|
2021-03-11 02:06:03 +08:00
|
|
|
t.write_conflict_ranges.push_back(req.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // NativeAPI atomic operation
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::clear(const KeyRangeRef& range, bool addConflictRange) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionClearMutations;
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& req = tr;
|
|
|
|
auto& t = req.transaction;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
KeyRef begin = range.begin;
|
|
|
|
KeyRef end = range.end;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
|
|
|
// we can translate it to an equivalent one with smaller keys
|
|
|
|
if (begin.size() >
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
begin = begin.substr(
|
|
|
|
0,
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
|
|
|
if (end.size() >
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
end = end.substr(
|
|
|
|
0,
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
|
|
|
|
|
|
|
auto r = KeyRangeRef(req.arena, KeyRangeRef(begin, end));
|
|
|
|
if (r.empty())
|
|
|
|
return;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-06-20 10:32:30 +08:00
|
|
|
t.mutations.emplace_back(req.arena, MutationRef::ClearRange, r.begin, r.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (addConflictRange)
|
|
|
|
t.write_conflict_ranges.push_back(req.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::clear(const KeyRef& key, bool addConflictRange) {
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionClearMutations;
|
2021-03-11 02:06:03 +08:00
|
|
|
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT
|
|
|
|
if (key.size() >
|
|
|
|
(key.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
2017-05-26 04:48:44 +08:00
|
|
|
return;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& req = tr;
|
|
|
|
auto& t = req.transaction;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// efficient single key range clear range mutation, see singleKeyRange
|
|
|
|
uint8_t* data = new (req.arena) uint8_t[key.size() + 1];
|
|
|
|
memcpy(data, key.begin(), key.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
data[key.size()] = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
t.mutations.emplace_back(
|
|
|
|
req.arena, MutationRef::ClearRange, KeyRef(data, key.size()), KeyRef(data, key.size() + 1));
|
|
|
|
if (addConflictRange)
|
2020-06-20 10:32:30 +08:00
|
|
|
t.write_conflict_ranges.emplace_back(req.arena, KeyRef(data, key.size()), KeyRef(data, key.size() + 1));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::addWriteConflictRange(const KeyRangeRef& keys) {
|
|
|
|
ASSERT(!keys.empty());
|
|
|
|
auto& req = tr;
|
|
|
|
auto& t = req.transaction;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// There aren't any keys in the database with size larger than KEY_SIZE_LIMIT, so if range contains large keys
|
|
|
|
// we can translate it to an equivalent one with smaller keys
|
2017-05-26 04:48:44 +08:00
|
|
|
KeyRef begin = keys.begin;
|
|
|
|
KeyRef end = keys.end;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (begin.size() >
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
begin = begin.substr(
|
|
|
|
0,
|
|
|
|
(begin.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
|
|
|
if (end.size() >
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT))
|
|
|
|
end = end.substr(
|
|
|
|
0,
|
|
|
|
(end.startsWith(systemKeys.begin) ? CLIENT_KNOBS->SYSTEM_KEY_SIZE_LIMIT : CLIENT_KNOBS->KEY_SIZE_LIMIT) +
|
|
|
|
1);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
KeyRangeRef r = KeyRangeRef(begin, end);
|
|
|
|
|
|
|
|
if (r.empty()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
t.write_conflict_ranges.push_back_deep(req.arena, r);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2018-06-02 06:21:40 +08:00
|
|
|
double Transaction::getBackoff(int errCode) {
|
2020-04-24 11:50:40 +08:00
|
|
|
double returnedBackoff = backoff;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (errCode == error_code_tag_throttled) {
|
2020-04-25 02:31:16 +08:00
|
|
|
auto priorityItr = cx->throttledTags.find(options.priority);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tag : options.tags) {
|
|
|
|
if (priorityItr != cx->throttledTags.end()) {
|
2020-04-24 11:50:40 +08:00
|
|
|
auto tagItr = priorityItr->second.find(tag);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (tagItr != priorityItr->second.end()) {
|
2020-05-05 01:11:36 +08:00
|
|
|
TEST(true); // Returning throttle backoff
|
2021-03-11 02:06:03 +08:00
|
|
|
returnedBackoff = std::min(CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL,
|
|
|
|
std::max(returnedBackoff, tagItr->second.throttleDuration()));
|
|
|
|
if (returnedBackoff == CLIENT_KNOBS->TAG_THROTTLE_RECHECK_INTERVAL) {
|
2020-04-24 11:50:40 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
returnedBackoff *= deterministicRandom()->random01();
|
|
|
|
|
|
|
|
// Set backoff for next time
|
2021-03-11 02:06:03 +08:00
|
|
|
if (errCode == error_code_proxy_memory_limit_exceeded) {
|
2020-04-24 11:50:40 +08:00
|
|
|
backoff = std::min(backoff * CLIENT_KNOBS->BACKOFF_GROWTH_RATE, CLIENT_KNOBS->RESOURCE_CONSTRAINED_MAX_BACKOFF);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-04-24 11:50:40 +08:00
|
|
|
backoff = std::min(backoff * CLIENT_KNOBS->BACKOFF_GROWTH_RATE, options.maxBackoff);
|
|
|
|
}
|
|
|
|
|
|
|
|
return returnedBackoff;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2019-03-22 06:48:40 +08:00
|
|
|
TransactionOptions::TransactionOptions(Database const& cx) {
|
2019-03-23 00:58:32 +08:00
|
|
|
reset(cx);
|
|
|
|
if (BUGGIFY) {
|
|
|
|
commitOnFirstProxy = true;
|
|
|
|
}
|
2019-03-20 00:15:41 +08:00
|
|
|
}
|
|
|
|
|
2020-06-14 08:14:43 +08:00
|
|
|
void TransactionOptions::clear() {
|
2019-03-23 00:58:32 +08:00
|
|
|
maxBackoff = CLIENT_KNOBS->DEFAULT_MAX_BACKOFF;
|
2020-06-14 08:14:43 +08:00
|
|
|
getReadVersionFlags = 0;
|
2019-06-21 05:06:32 +08:00
|
|
|
sizeLimit = CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT;
|
2020-06-14 08:14:43 +08:00
|
|
|
maxTransactionLoggingFieldLength = 0;
|
|
|
|
checkWritesEnabled = false;
|
|
|
|
causalWriteRisky = false;
|
|
|
|
commitOnFirstProxy = false;
|
|
|
|
debugDump = false;
|
|
|
|
lockAware = false;
|
|
|
|
readOnly = false;
|
|
|
|
firstInBatch = false;
|
|
|
|
includePort = false;
|
|
|
|
reportConflictingKeys = false;
|
|
|
|
tags = TagSet{};
|
|
|
|
readTags = TagSet{};
|
2020-04-25 02:31:16 +08:00
|
|
|
priority = TransactionPriority::DEFAULT;
|
2020-07-16 06:03:35 +08:00
|
|
|
expensiveClearCostEstimation = false;
|
2019-03-22 06:48:40 +08:00
|
|
|
}
|
|
|
|
|
2020-06-14 08:14:43 +08:00
|
|
|
TransactionOptions::TransactionOptions() {
|
|
|
|
clear();
|
|
|
|
}
|
|
|
|
|
2019-03-22 06:48:40 +08:00
|
|
|
void TransactionOptions::reset(Database const& cx) {
|
2020-06-14 08:14:43 +08:00
|
|
|
clear();
|
2019-03-22 06:48:40 +08:00
|
|
|
lockAware = cx->lockAware;
|
2020-04-04 12:38:58 +08:00
|
|
|
if (cx->apiVersionAtLeast(630)) {
|
2020-02-04 07:25:30 +08:00
|
|
|
includePort = true;
|
|
|
|
}
|
2019-03-20 00:15:41 +08:00
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
void Transaction::reset() {
|
|
|
|
tr = CommitTransactionRequest();
|
|
|
|
readVersion = Future<Version>();
|
2019-03-01 09:45:00 +08:00
|
|
|
metadataVersion = Promise<Optional<Key>>();
|
2017-05-26 04:48:44 +08:00
|
|
|
extraConflictRanges.clear();
|
|
|
|
versionstampPromise = Promise<Standalone<StringRef>>();
|
|
|
|
commitResult = Promise<Void>();
|
|
|
|
committing = Future<Void>();
|
|
|
|
info.taskID = cx->taskID;
|
|
|
|
info.debugID = Optional<UID>();
|
|
|
|
flushTrLogsIfEnabled();
|
|
|
|
trLogInfo = Reference<TransactionLogInfo>(createTrLogInfoProbabilistically(cx));
|
|
|
|
cancelWatches();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (apiVersionAtLeast(16)) {
|
2019-03-20 00:15:41 +08:00
|
|
|
options.reset(cx);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::fullReset() {
|
|
|
|
reset();
|
2020-07-10 01:49:33 +08:00
|
|
|
span = Span(span.location);
|
|
|
|
info.spanID = span.context;
|
2017-05-26 04:48:44 +08:00
|
|
|
backoff = CLIENT_KNOBS->DEFAULT_BACKOFF;
|
|
|
|
}
|
|
|
|
|
|
|
|
int Transaction::apiVersionAtLeast(int minVersion) const {
|
2018-09-22 06:58:14 +08:00
|
|
|
return cx->apiVersionAtLeast(minVersion);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
class MutationBlock {
|
|
|
|
public:
|
|
|
|
bool mutated;
|
|
|
|
bool cleared;
|
|
|
|
ValueRef setValue;
|
|
|
|
|
|
|
|
MutationBlock() : mutated(false) {}
|
|
|
|
MutationBlock(bool _cleared) : mutated(true), cleared(_cleared) {}
|
|
|
|
MutationBlock(ValueRef value) : mutated(true), cleared(false), setValue(value) {}
|
|
|
|
};
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
bool compareBegin(KeyRangeRef lhs, KeyRangeRef rhs) {
|
|
|
|
return lhs.begin < rhs.begin;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// If there is any intersection between the two given sets of ranges, returns a range that
|
|
|
|
// falls within the intersection
|
|
|
|
Optional<KeyRangeRef> intersects(VectorRef<KeyRangeRef> lhs, VectorRef<KeyRangeRef> rhs) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (lhs.size() && rhs.size()) {
|
|
|
|
std::sort(lhs.begin(), lhs.end(), compareBegin);
|
|
|
|
std::sort(rhs.begin(), rhs.end(), compareBegin);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
int l = 0, r = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
while (l < lhs.size() && r < rhs.size()) {
|
|
|
|
if (lhs[l].end <= rhs[r].begin)
|
2017-05-26 04:48:44 +08:00
|
|
|
l++;
|
2021-03-11 02:06:03 +08:00
|
|
|
else if (rhs[r].end <= lhs[l].begin)
|
2017-05-26 04:48:44 +08:00
|
|
|
r++;
|
|
|
|
else
|
|
|
|
return lhs[l] & rhs[r];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Optional<KeyRangeRef>();
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR void checkWrites(Database cx,
|
|
|
|
Future<Void> committed,
|
|
|
|
Promise<Void> outCommitted,
|
|
|
|
CommitTransactionRequest req,
|
|
|
|
Transaction* checkTr) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state Version version;
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(committed);
|
|
|
|
// If the commit is successful, by definition the transaction still exists for now. Grab the version, and don't
|
|
|
|
// use it again.
|
2017-05-26 04:48:44 +08:00
|
|
|
version = checkTr->getCommittedVersion();
|
|
|
|
outCommitted.send(Void());
|
|
|
|
} catch (Error& e) {
|
|
|
|
outCommitted.sendError(e);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(delay(deterministicRandom()->random01())); // delay between 0 and 1 seconds
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// Future<Optional<Version>> version, Database cx, CommitTransactionRequest req ) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state KeyRangeMap<MutationBlock> expectedValues;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& mutations = req.transaction.mutations;
|
2017-05-26 04:48:44 +08:00
|
|
|
state int mCount = mutations.size(); // debugging info for traceEvent
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int idx = 0; idx < mutations.size(); idx++) {
|
|
|
|
if (mutations[idx].type == MutationRef::SetValue)
|
|
|
|
expectedValues.insert(singleKeyRange(mutations[idx].param1), MutationBlock(mutations[idx].param2));
|
|
|
|
else if (mutations[idx].type == MutationRef::ClearRange)
|
|
|
|
expectedValues.insert(KeyRangeRef(mutations[idx].param1, mutations[idx].param2), MutationBlock(true));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
|
|
|
state Transaction tr(cx);
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.setVersion(version);
|
2017-05-26 04:48:44 +08:00
|
|
|
state int checkedRanges = 0;
|
|
|
|
state KeyRangeMap<MutationBlock>::Ranges ranges = expectedValues.ranges();
|
2020-07-11 01:43:33 +08:00
|
|
|
state KeyRangeMap<MutationBlock>::iterator it = ranges.begin();
|
2021-03-11 02:06:03 +08:00
|
|
|
for (; it != ranges.end(); ++it) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state MutationBlock m = it->value();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (m.mutated) {
|
2017-05-26 04:48:44 +08:00
|
|
|
checkedRanges++;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (m.cleared) {
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult shouldBeEmpty = wait(tr.getRange(it->range(), 1));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (shouldBeEmpty.size()) {
|
|
|
|
TraceEvent(SevError, "CheckWritesFailed")
|
|
|
|
.detail("Class", "Clear")
|
|
|
|
.detail("KeyBegin", it->range().begin)
|
|
|
|
.detail("KeyEnd", it->range().end);
|
2017-05-26 04:48:44 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
Optional<Value> val = wait(tr.get(it->range().begin));
|
|
|
|
if (!val.present() || val.get() != m.setValue) {
|
2020-02-22 02:55:14 +08:00
|
|
|
TraceEvent evt(SevError, "CheckWritesFailed");
|
2021-03-11 02:06:03 +08:00
|
|
|
evt.detail("Class", "Set").detail("Key", it->range().begin).detail("Expected", m.setValue);
|
|
|
|
if (!val.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
evt.detail("Actual", "_Value Missing_");
|
|
|
|
else
|
2019-03-19 11:27:34 +08:00
|
|
|
evt.detail("Actual", val.get());
|
2017-05-26 04:48:44 +08:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("CheckWritesSuccess")
|
|
|
|
.detail("Version", version)
|
|
|
|
.detail("MutationCount", mCount)
|
|
|
|
.detail("CheckedRanges", checkedRanges);
|
|
|
|
} catch (Error& e) {
|
2017-09-29 07:35:08 +08:00
|
|
|
bool ok = e.code() == error_code_transaction_too_old || e.code() == error_code_future_version;
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(ok ? SevWarn : SevError, "CheckWritesFailed").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> commitDummyTransaction(Database cx,
|
|
|
|
KeyRange range,
|
|
|
|
TransactionInfo info,
|
|
|
|
TransactionOptions options) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state Transaction tr(cx);
|
|
|
|
state int retries = 0;
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:dummyTransaction"_loc, info.spanID);
|
|
|
|
tr.span.addParent(span.context);
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
2019-03-19 06:03:43 +08:00
|
|
|
TraceEvent("CommitDummyTransaction").detail("Key", range.begin).detail("Retries", retries);
|
2017-05-26 04:48:44 +08:00
|
|
|
tr.options = options;
|
|
|
|
tr.info.taskID = info.taskID;
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.setOption(FDBTransactionOptions::ACCESS_SYSTEM_KEYS);
|
|
|
|
tr.setOption(FDBTransactionOptions::CAUSAL_WRITE_RISKY);
|
|
|
|
tr.setOption(FDBTransactionOptions::LOCK_AWARE);
|
2017-05-26 04:48:44 +08:00
|
|
|
tr.addReadConflictRange(range);
|
|
|
|
tr.addWriteConflictRange(range);
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(tr.commit());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("CommitDummyTransactionError")
|
|
|
|
.error(e, true)
|
|
|
|
.detail("Key", range.begin)
|
|
|
|
.detail("Retries", retries);
|
|
|
|
wait(tr.onError(e));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
++retries;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::cancelWatches(Error const& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < watches.size(); ++i)
|
|
|
|
if (!watches[i]->onChangeTrigger.isSet())
|
2017-05-26 04:48:44 +08:00
|
|
|
watches[i]->onChangeTrigger.sendError(e);
|
|
|
|
|
|
|
|
watches.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::setupWatches() {
|
|
|
|
try {
|
|
|
|
Future<Version> watchVersion = getCommittedVersion() > 0 ? getCommittedVersion() : getReadVersion();
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < watches.size(); ++i)
|
|
|
|
watches[i]->setWatch(
|
|
|
|
watchValueMap(watchVersion, watches[i]->key, watches[i]->value, cx, info, options.readTags));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
watches.clear();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error&) {
|
2017-05-26 04:48:44 +08:00
|
|
|
ASSERT(false); // The above code must NOT throw because commit has already occured.
|
|
|
|
throw internal_error();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-01 14:14:52 +08:00
|
|
|
ACTOR Future<Optional<ClientTrCommitCostEstimation>> estimateCommitCosts(Transaction* self,
|
2020-08-30 03:35:31 +08:00
|
|
|
CommitTransactionRef const* transaction) {
|
2020-08-01 14:14:52 +08:00
|
|
|
state ClientTrCommitCostEstimation trCommitCosts;
|
2020-08-19 12:23:23 +08:00
|
|
|
state KeyRangeRef keyRange;
|
2020-08-02 02:20:13 +08:00
|
|
|
state int i = 0;
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-02 02:20:13 +08:00
|
|
|
for (; i < transaction->mutations.size(); ++i) {
|
2020-08-01 14:14:52 +08:00
|
|
|
auto* it = &transaction->mutations[i];
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-01 14:14:52 +08:00
|
|
|
if (it->type == MutationRef::Type::SetValue || it->isAtomicOp()) {
|
|
|
|
trCommitCosts.opsCount++;
|
2020-08-18 04:01:19 +08:00
|
|
|
trCommitCosts.writeCosts += getWriteOperationCost(it->expectedSize());
|
2020-07-14 08:18:52 +08:00
|
|
|
} else if (it->type == MutationRef::Type::ClearRange) {
|
2020-08-01 14:14:52 +08:00
|
|
|
trCommitCosts.opsCount++;
|
2020-08-19 12:23:23 +08:00
|
|
|
keyRange = KeyRangeRef(it->param1, it->param2);
|
2020-07-15 10:47:39 +08:00
|
|
|
if (self->options.expensiveClearCostEstimation) {
|
2020-08-04 06:18:34 +08:00
|
|
|
StorageMetrics m = wait(self->getStorageMetrics(keyRange, CLIENT_KNOBS->TOO_MANY));
|
2020-08-18 04:01:19 +08:00
|
|
|
trCommitCosts.clearIdxCosts.emplace_back(i, getWriteOperationCost(m.bytes));
|
|
|
|
trCommitCosts.writeCosts += getWriteOperationCost(m.bytes);
|
2020-08-30 03:35:31 +08:00
|
|
|
++trCommitCosts.expensiveCostEstCount;
|
|
|
|
++self->getDatabase()->transactionsExpensiveClearCostEstCount;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-08-01 14:14:52 +08:00
|
|
|
std::vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(getKeyRangeLocations(self->getDatabase(),
|
|
|
|
keyRange,
|
|
|
|
CLIENT_KNOBS->TOO_MANY,
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::getShardState,
|
|
|
|
self->info));
|
|
|
|
if (locations.empty())
|
|
|
|
continue;
|
2020-08-10 14:42:30 +08:00
|
|
|
|
2020-08-06 22:53:26 +08:00
|
|
|
uint64_t bytes = 0;
|
2020-08-19 12:23:23 +08:00
|
|
|
if (locations.size() == 1) {
|
2020-08-06 22:53:26 +08:00
|
|
|
bytes = CLIENT_KNOBS->INCOMPLETE_SHARD_PLUS;
|
2020-08-30 03:35:31 +08:00
|
|
|
} else { // small clear on the boundary will hit two shards but be much smaller than the shard size
|
2020-08-13 01:49:12 +08:00
|
|
|
bytes = CLIENT_KNOBS->INCOMPLETE_SHARD_PLUS * 2 +
|
|
|
|
(locations.size() - 2) * (int64_t)self->getDatabase()->smoothMidShardSize.smoothTotal();
|
2020-08-19 12:23:23 +08:00
|
|
|
}
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-18 04:01:19 +08:00
|
|
|
trCommitCosts.clearIdxCosts.emplace_back(i, getWriteOperationCost(bytes));
|
|
|
|
trCommitCosts.writeCosts += getWriteOperationCost(bytes);
|
2020-07-14 08:18:52 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-01 14:14:52 +08:00
|
|
|
// sample on written bytes
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->getDatabase()->sampleOnCost(trCommitCosts.writeCosts))
|
|
|
|
return Optional<ClientTrCommitCostEstimation>();
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-20 00:52:50 +08:00
|
|
|
// sample clear op: the expectation of #sampledOp is every COMMIT_SAMPLE_COST sample once
|
2020-08-30 03:35:31 +08:00
|
|
|
// we also scale the cost of mutations whose cost is less than COMMIT_SAMPLE_COST as scaledCost =
|
|
|
|
// min(COMMIT_SAMPLE_COST, cost) If we have 4 transactions: A - 100 1-cost mutations: E[sampled ops] = 1, E[sampled
|
|
|
|
// cost] = 100 B - 1 100-cost mutation: E[sampled ops] = 1, E[sampled cost] = 100 C - 50 2-cost mutations: E[sampled
|
|
|
|
// ops] = 1, E[sampled cost] = 100 D - 1 150-cost mutation and 150 1-cost mutations: E[sampled ops] = 3, E[sampled
|
|
|
|
// cost] = 150cost * 1 + 150 * 100cost * 0.01 = 300
|
2020-08-11 06:29:59 +08:00
|
|
|
ASSERT(trCommitCosts.writeCosts > 0);
|
2020-08-15 02:18:54 +08:00
|
|
|
std::deque<std::pair<int, uint64_t>> newClearIdxCosts;
|
2020-08-11 06:29:59 +08:00
|
|
|
for (const auto& [idx, cost] : trCommitCosts.clearIdxCosts) {
|
2020-08-30 03:35:31 +08:00
|
|
|
if (trCommitCosts.writeCosts >= CLIENT_KNOBS->COMMIT_SAMPLE_COST) {
|
2020-08-11 06:29:59 +08:00
|
|
|
double mul = trCommitCosts.writeCosts / std::max(1.0, (double)CLIENT_KNOBS->COMMIT_SAMPLE_COST);
|
2020-08-30 03:35:31 +08:00
|
|
|
if (deterministicRandom()->random01() < cost * mul / trCommitCosts.writeCosts) {
|
|
|
|
newClearIdxCosts.emplace_back(
|
|
|
|
idx, cost < CLIENT_KNOBS->COMMIT_SAMPLE_COST ? CLIENT_KNOBS->COMMIT_SAMPLE_COST : cost);
|
2020-07-14 08:18:52 +08:00
|
|
|
}
|
2020-08-30 03:35:31 +08:00
|
|
|
} else if (deterministicRandom()->random01() < (double)cost / trCommitCosts.writeCosts) {
|
|
|
|
newClearIdxCosts.emplace_back(
|
|
|
|
idx, cost < CLIENT_KNOBS->COMMIT_SAMPLE_COST ? CLIENT_KNOBS->COMMIT_SAMPLE_COST : cost);
|
2020-07-14 08:18:52 +08:00
|
|
|
}
|
|
|
|
}
|
2020-08-15 02:18:54 +08:00
|
|
|
|
2020-08-11 06:29:59 +08:00
|
|
|
trCommitCosts.clearIdxCosts.swap(newClearIdxCosts);
|
2020-07-14 08:18:52 +08:00
|
|
|
return trCommitCosts;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR static Future<Void> tryCommit(Database cx,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
CommitTransactionRequest req,
|
|
|
|
Future<Version> readVersion,
|
|
|
|
TransactionInfo info,
|
|
|
|
Version* pCommittedVersion,
|
|
|
|
Transaction* tr,
|
|
|
|
TransactionOptions options) {
|
|
|
|
state TraceInterval interval("TransactionCommit");
|
2019-12-10 01:44:48 +08:00
|
|
|
state double startTime = now();
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:tryCommit"_loc, info.spanID);
|
|
|
|
req.spanContext = span.context;
|
2017-05-26 04:48:44 +08:00
|
|
|
if (info.debugID.present())
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent(interval.begin()).detail("Parent", info.debugID.get());
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (CLIENT_BUGGIFY) {
|
2019-12-10 01:44:48 +08:00
|
|
|
throw deterministicRandom()->randomChoice(std::vector<Error>{
|
2021-03-11 02:06:03 +08:00
|
|
|
not_committed(), transaction_too_old(), proxy_memory_limit_exceeded(), commit_unknown_result() });
|
2019-12-10 01:44:48 +08:00
|
|
|
}
|
|
|
|
|
2020-08-30 03:35:31 +08:00
|
|
|
if (req.tagSet.present() && tr->options.priority < TransactionPriority::IMMEDIATE) {
|
2020-08-01 14:14:52 +08:00
|
|
|
wait(store(req.transaction.read_snapshot, readVersion) &&
|
|
|
|
store(req.commitCostEstimation, estimateCommitCosts(tr, &req.transaction)));
|
2020-07-18 09:48:58 +08:00
|
|
|
} else {
|
2020-08-08 07:36:17 +08:00
|
|
|
wait(store(req.transaction.read_snapshot, readVersion));
|
2020-07-15 07:07:21 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
startTime = now();
|
|
|
|
state Optional<UID> commitID = Optional<UID>();
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present()) {
|
2019-05-11 05:01:52 +08:00
|
|
|
commitID = nondeterministicRandom()->randomUniqueID();
|
2017-05-26 04:48:44 +08:00
|
|
|
g_traceBatch.addAttach("CommitAttachID", info.debugID.get().first(), commitID.get().first());
|
|
|
|
g_traceBatch.addEvent("CommitDebug", commitID.get().first(), "NativeAPI.commit.Before");
|
|
|
|
}
|
|
|
|
|
2017-11-03 08:00:44 +08:00
|
|
|
req.debugID = commitID;
|
Fix VersionStamp problems by instead adding a COMMIT_ON_FIRST_PROXY transaction option.
Simulation identified the fact that we can violate the
VersionStamps-are-always-increasing promise via the following series of events:
1. On proxy 0, dumpData adds commit requests to proxy 0's commit promise stream
2. To any proxy, a client submits the first transaction of abortBackup, which stops further dumpData calls on proxy 0.
3. To any proxy that is not proxy 0, submit a transaction that checks if it needs to upgrade the destination version.
4. The transaction from (3) is committed
5. Transactions from (1) are committed
This is possible because the dumpData transactions have no read conflict
ranges, and thus it's impossible to make them abort due to "conflicting"
transactions. There's also no promise that if client C sends a commit to proxy
A, and later a client D sends a commit to proxy B, that B must log its commit
after A. (We only promise that if C is told it was committed before D is told
it was committed, then A committed before B.)
There was a failed attempt to fix this problem. We tried to add read conflict
ranges to dumpData transactions so that they could be aborted by "conflicting"
transactions. However, this failed because this now means that dumpData
transactions require conflict resolution, and the stale read version that they
use can cause them to be aborted with a transaction_too_old error.
(Transactions that don't have read conflict ranges will never return
transaction_too_old, because with no reads, the read snapshot version is
effectively meaningless.) This was never previously possible, so the existing
code doesn't retry commits, and to make things more complicated, the dumpData
commits must be applied in order. This would require either adding
dependencies to transactions (if A is going to commit then B must also be/have
committed), which would be complicated, or submitting transactions with a fixed
read version, and replaying the failed commits with a higher read version once
we get a transaction_too_old error, which would unacceptably slow down the
maximum throughput of dumpData.
Thus, we've instead elected to add a special transaction option that bypasses
proxy load balancing for commits, and always commits against proxy 0. We can
know for certain that after the transaction from (2) is committed, all of the
dumpData transactions that will be committed have been added to the commit
promise stream on proxy 0. Thus, if we enqueue another transaction against
proxy 0, we can know that it will be placed into the promise stream after all
of the dumpData transactions, thus providing the semantics that we require: no
dumpData transaction can commit after the destination version upgrade
transaction.
2017-12-20 08:44:07 +08:00
|
|
|
state Future<CommitID> reply;
|
|
|
|
if (options.commitOnFirstProxy) {
|
2020-09-11 08:44:15 +08:00
|
|
|
if (cx->clientInfo->get().firstCommitProxy.present()) {
|
|
|
|
reply = throwErrorOr(brokenPromiseToMaybeDelivered(
|
|
|
|
cx->clientInfo->get().firstCommitProxy.get().commit.tryGetReply(req)));
|
2020-02-26 04:34:31 +08:00
|
|
|
} else {
|
2020-09-11 08:44:15 +08:00
|
|
|
const std::vector<CommitProxyInterface>& proxies = cx->clientInfo->get().commitProxies;
|
2021-03-11 02:06:03 +08:00
|
|
|
reply = proxies.size() ? throwErrorOr(brokenPromiseToMaybeDelivered(proxies[0].commit.tryGetReply(req)))
|
|
|
|
: Never();
|
2020-02-26 04:34:31 +08:00
|
|
|
}
|
Fix VersionStamp problems by instead adding a COMMIT_ON_FIRST_PROXY transaction option.
Simulation identified the fact that we can violate the
VersionStamps-are-always-increasing promise via the following series of events:
1. On proxy 0, dumpData adds commit requests to proxy 0's commit promise stream
2. To any proxy, a client submits the first transaction of abortBackup, which stops further dumpData calls on proxy 0.
3. To any proxy that is not proxy 0, submit a transaction that checks if it needs to upgrade the destination version.
4. The transaction from (3) is committed
5. Transactions from (1) are committed
This is possible because the dumpData transactions have no read conflict
ranges, and thus it's impossible to make them abort due to "conflicting"
transactions. There's also no promise that if client C sends a commit to proxy
A, and later a client D sends a commit to proxy B, that B must log its commit
after A. (We only promise that if C is told it was committed before D is told
it was committed, then A committed before B.)
There was a failed attempt to fix this problem. We tried to add read conflict
ranges to dumpData transactions so that they could be aborted by "conflicting"
transactions. However, this failed because this now means that dumpData
transactions require conflict resolution, and the stale read version that they
use can cause them to be aborted with a transaction_too_old error.
(Transactions that don't have read conflict ranges will never return
transaction_too_old, because with no reads, the read snapshot version is
effectively meaningless.) This was never previously possible, so the existing
code doesn't retry commits, and to make things more complicated, the dumpData
commits must be applied in order. This would require either adding
dependencies to transactions (if A is going to commit then B must also be/have
committed), which would be complicated, or submitting transactions with a fixed
read version, and replaying the failed commits with a higher read version once
we get a transaction_too_old error, which would unacceptably slow down the
maximum throughput of dumpData.
Thus, we've instead elected to add a special transaction option that bypasses
proxy load balancing for commits, and always commits against proxy 0. We can
know for certain that after the transaction from (2) is committed, all of the
dumpData transactions that will be committed have been added to the commit
promise stream on proxy 0. Thus, if we enqueue another transaction against
proxy 0, we can know that it will be placed into the promise stream after all
of the dumpData transactions, thus providing the semantics that we require: no
dumpData transaction can commit after the destination version upgrade
transaction.
2017-12-20 08:44:07 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
reply = basicLoadBalance(cx->getCommitProxies(info.useProvisionalProxies),
|
|
|
|
&CommitProxyInterface::commit,
|
|
|
|
req,
|
|
|
|
TaskPriority::DefaultPromiseEndpoint,
|
|
|
|
true);
|
Fix VersionStamp problems by instead adding a COMMIT_ON_FIRST_PROXY transaction option.
Simulation identified the fact that we can violate the
VersionStamps-are-always-increasing promise via the following series of events:
1. On proxy 0, dumpData adds commit requests to proxy 0's commit promise stream
2. To any proxy, a client submits the first transaction of abortBackup, which stops further dumpData calls on proxy 0.
3. To any proxy that is not proxy 0, submit a transaction that checks if it needs to upgrade the destination version.
4. The transaction from (3) is committed
5. Transactions from (1) are committed
This is possible because the dumpData transactions have no read conflict
ranges, and thus it's impossible to make them abort due to "conflicting"
transactions. There's also no promise that if client C sends a commit to proxy
A, and later a client D sends a commit to proxy B, that B must log its commit
after A. (We only promise that if C is told it was committed before D is told
it was committed, then A committed before B.)
There was a failed attempt to fix this problem. We tried to add read conflict
ranges to dumpData transactions so that they could be aborted by "conflicting"
transactions. However, this failed because this now means that dumpData
transactions require conflict resolution, and the stale read version that they
use can cause them to be aborted with a transaction_too_old error.
(Transactions that don't have read conflict ranges will never return
transaction_too_old, because with no reads, the read snapshot version is
effectively meaningless.) This was never previously possible, so the existing
code doesn't retry commits, and to make things more complicated, the dumpData
commits must be applied in order. This would require either adding
dependencies to transactions (if A is going to commit then B must also be/have
committed), which would be complicated, or submitting transactions with a fixed
read version, and replaying the failed commits with a higher read version once
we get a transaction_too_old error, which would unacceptably slow down the
maximum throughput of dumpData.
Thus, we've instead elected to add a special transaction option that bypasses
proxy load balancing for commits, and always commits against proxy 0. We can
know for certain that after the transaction from (2) is committed, all of the
dumpData transactions that will be committed have been added to the commit
promise stream on proxy 0. Thus, if we enqueue another transaction against
proxy 0, we can know that it will be placed into the promise stream after all
of the dumpData transactions, thus providing the semantics that we require: no
dumpData transaction can commit after the destination version upgrade
transaction.
2017-12-20 08:44:07 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {
|
2017-05-26 04:48:44 +08:00
|
|
|
reply.cancel();
|
|
|
|
throw request_maybe_delivered();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(CommitID ci = wait(reply)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
Version v = ci.version;
|
|
|
|
if (v != invalidVersion) {
|
2019-12-10 01:44:48 +08:00
|
|
|
if (CLIENT_BUGGIFY) {
|
|
|
|
throw commit_unknown_result();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
TraceEvent(interval.end()).detail("CommittedVersion", v);
|
|
|
|
*pCommittedVersion = v;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (v > cx->metadataVersionCache[cx->mvCacheInsertLocation].first) {
|
|
|
|
cx->mvCacheInsertLocation = (cx->mvCacheInsertLocation + 1) % cx->metadataVersionCache.size();
|
2019-03-05 08:48:34 +08:00
|
|
|
cx->metadataVersionCache[cx->mvCacheInsertLocation] = std::make_pair(v, ci.metadataVersion);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
Standalone<StringRef> ret = makeString(10);
|
|
|
|
placeVersionstamp(mutateString(ret), v, ci.txnBatchId);
|
|
|
|
tr->versionstampPromise.send(ret);
|
|
|
|
|
|
|
|
tr->numErrors = 0;
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsCommitCompleted;
|
2018-02-08 03:56:47 +08:00
|
|
|
cx->transactionCommittedMutations += req.transaction.mutations.size();
|
|
|
|
cx->transactionCommittedMutationBytes += req.transaction.mutations.expectedSize();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
g_traceBatch.addEvent("CommitDebug", commitID.get().first(), "NativeAPI.commit.After");
|
|
|
|
|
|
|
|
double latency = now() - startTime;
|
|
|
|
cx->commitLatencies.addSample(latency);
|
|
|
|
cx->latencies.addSample(now() - tr->startTime);
|
|
|
|
if (trLogInfo)
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventCommit_V2(startTime,
|
|
|
|
cx->clientLocality.dcId(),
|
|
|
|
latency,
|
|
|
|
req.transaction.mutations.size(),
|
|
|
|
req.transaction.mutations.expectedSize(),
|
|
|
|
ci.version,
|
|
|
|
req));
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
} else {
|
2020-03-25 00:48:03 +08:00
|
|
|
// clear the RYW transaction which contains previous conflicting keys
|
2020-04-05 04:26:11 +08:00
|
|
|
tr->info.conflictingKeys.reset();
|
2020-03-27 06:52:30 +08:00
|
|
|
if (ci.conflictingKRIndices.present()) {
|
2020-04-09 05:33:41 +08:00
|
|
|
tr->info.conflictingKeys =
|
|
|
|
std::make_shared<CoalescedKeyRangeMap<Value>>(conflictingKeysFalse, specialKeys.end);
|
2020-04-05 04:26:11 +08:00
|
|
|
state Standalone<VectorRef<int>> conflictingKRIndices = ci.conflictingKRIndices.get();
|
|
|
|
// drop duplicate indices and merge overlapped ranges
|
|
|
|
// Note: addReadConflictRange in native transaction object does not merge overlapped ranges
|
|
|
|
state std::unordered_set<int> mergedIds(conflictingKRIndices.begin(),
|
2021-03-11 02:06:03 +08:00
|
|
|
conflictingKRIndices.end());
|
2020-04-05 04:26:11 +08:00
|
|
|
for (auto const& rCRIndex : mergedIds) {
|
|
|
|
const KeyRangeRef kr = req.transaction.read_conflict_ranges[rCRIndex];
|
2020-04-09 03:43:25 +08:00
|
|
|
const KeyRange krWithPrefix = KeyRangeRef(kr.begin.withPrefix(conflictingKeysRange.begin),
|
2020-04-09 05:33:41 +08:00
|
|
|
kr.end.withPrefix(conflictingKeysRange.begin));
|
2020-04-05 04:26:11 +08:00
|
|
|
tr->info.conflictingKeys->insert(krWithPrefix, conflictingKeysTrue);
|
2020-03-25 00:48:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
if (info.debugID.present())
|
|
|
|
TraceEvent(interval.end()).detail("Conflict", 1);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (info.debugID.present())
|
2017-05-26 04:48:44 +08:00
|
|
|
g_traceBatch.addEvent("CommitDebug", commitID.get().first(), "NativeAPI.commit.After");
|
|
|
|
|
|
|
|
throw not_committed();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_request_maybe_delivered || e.code() == error_code_commit_unknown_result) {
|
|
|
|
// We don't know if the commit happened, and it might even still be in flight.
|
|
|
|
|
Fix VersionStamp problems by instead adding a COMMIT_ON_FIRST_PROXY transaction option.
Simulation identified the fact that we can violate the
VersionStamps-are-always-increasing promise via the following series of events:
1. On proxy 0, dumpData adds commit requests to proxy 0's commit promise stream
2. To any proxy, a client submits the first transaction of abortBackup, which stops further dumpData calls on proxy 0.
3. To any proxy that is not proxy 0, submit a transaction that checks if it needs to upgrade the destination version.
4. The transaction from (3) is committed
5. Transactions from (1) are committed
This is possible because the dumpData transactions have no read conflict
ranges, and thus it's impossible to make them abort due to "conflicting"
transactions. There's also no promise that if client C sends a commit to proxy
A, and later a client D sends a commit to proxy B, that B must log its commit
after A. (We only promise that if C is told it was committed before D is told
it was committed, then A committed before B.)
There was a failed attempt to fix this problem. We tried to add read conflict
ranges to dumpData transactions so that they could be aborted by "conflicting"
transactions. However, this failed because this now means that dumpData
transactions require conflict resolution, and the stale read version that they
use can cause them to be aborted with a transaction_too_old error.
(Transactions that don't have read conflict ranges will never return
transaction_too_old, because with no reads, the read snapshot version is
effectively meaningless.) This was never previously possible, so the existing
code doesn't retry commits, and to make things more complicated, the dumpData
commits must be applied in order. This would require either adding
dependencies to transactions (if A is going to commit then B must also be/have
committed), which would be complicated, or submitting transactions with a fixed
read version, and replaying the failed commits with a higher read version once
we get a transaction_too_old error, which would unacceptably slow down the
maximum throughput of dumpData.
Thus, we've instead elected to add a special transaction option that bypasses
proxy load balancing for commits, and always commits against proxy 0. We can
know for certain that after the transaction from (2) is committed, all of the
dumpData transactions that will be committed have been added to the commit
promise stream on proxy 0. Thus, if we enqueue another transaction against
proxy 0, we can know that it will be placed into the promise stream after all
of the dumpData transactions, thus providing the semantics that we require: no
dumpData transaction can commit after the destination version upgrade
transaction.
2017-12-20 08:44:07 +08:00
|
|
|
if (!options.causalWriteRisky) {
|
2020-09-11 08:44:15 +08:00
|
|
|
// Make sure it's not still in flight, either by ensuring the master we submitted to is dead, or the
|
|
|
|
// version we submitted with is dead, or by committing a conflicting transaction successfully
|
|
|
|
// if ( cx->getCommitProxies()->masterGeneration <= originalMasterGeneration )
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// To ensure the original request is not in flight, we need a key range which intersects its read
|
|
|
|
// conflict ranges We pick a key range which also intersects its write conflict ranges, since that
|
|
|
|
// avoids potentially creating conflicts where there otherwise would be none We make the range as small
|
|
|
|
// as possible (a single key range) to minimize conflicts The intersection will never be empty, because
|
|
|
|
// if it were (since !causalWriteRisky) makeSelfConflicting would have been applied automatically to req
|
|
|
|
KeyRangeRef selfConflictingRange =
|
|
|
|
intersects(req.transaction.write_conflict_ranges, req.transaction.read_conflict_ranges).get();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
TEST(true); // Waiting for dummy transaction to report commit_unknown_result
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(commitDummyTransaction(cx, singleKeyRange(selfConflictingRange.begin), info, tr->options));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// The user needs to be informed that we aren't sure whether the commit happened. Standard retry loops
|
|
|
|
// retry it anyway (relying on transaction idempotence) but a client might do something else.
|
2017-05-26 04:48:44 +08:00
|
|
|
throw commit_unknown_result();
|
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() != error_code_transaction_too_old && e.code() != error_code_not_committed &&
|
|
|
|
e.code() != error_code_database_locked && e.code() != error_code_proxy_memory_limit_exceeded &&
|
|
|
|
e.code() != error_code_batch_transaction_throttled && e.code() != error_code_tag_throttled) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevError, "TryCommitError").error(e);
|
2020-05-02 12:47:12 +08:00
|
|
|
}
|
2017-11-02 02:51:31 +08:00
|
|
|
if (trLogInfo)
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventCommitError(
|
|
|
|
startTime, cx->clientLocality.dcId(), static_cast<int>(e.code()), req));
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> Transaction::commitMutations() {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
// if this is a read-only transaction return immediately
|
|
|
|
if (!tr.transaction.write_conflict_ranges.size() && !tr.transaction.mutations.size()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
numErrors = 0;
|
|
|
|
|
|
|
|
committedVersion = invalidVersion;
|
2017-09-29 07:35:08 +08:00
|
|
|
versionstampPromise.sendError(no_commit_version());
|
2017-05-26 04:48:44 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsCommitStarted;
|
2018-06-06 03:10:28 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.readOnly)
|
2018-06-06 03:10:28 +08:00
|
|
|
return transaction_read_only();
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
cx->mutationsPerCommit.addSample(tr.transaction.mutations.size());
|
|
|
|
cx->bytesPerCommit.addSample(tr.transaction.mutations.expectedSize());
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.tags.size())
|
2020-07-15 07:07:21 +08:00
|
|
|
tr.tagSet = options.tags;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-06-29 01:15:37 +08:00
|
|
|
size_t transactionSize = getSize();
|
2017-05-26 04:48:44 +08:00
|
|
|
if (transactionSize > (uint64_t)FLOW_KNOBS->PACKET_WARNING) {
|
|
|
|
TraceEvent(!g_network->isSimulated() ? SevWarnAlways : SevWarn, "LargeTransaction")
|
2021-03-11 02:06:03 +08:00
|
|
|
.suppressFor(1.0)
|
|
|
|
.detail("Size", transactionSize)
|
|
|
|
.detail("NumMutations", tr.transaction.mutations.size())
|
|
|
|
.detail("ReadConflictSize", tr.transaction.read_conflict_ranges.expectedSize())
|
|
|
|
.detail("WriteConflictSize", tr.transaction.write_conflict_ranges.expectedSize())
|
|
|
|
.detail("DebugIdentifier", trLogInfo ? trLogInfo->identifier : "");
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!apiVersionAtLeast(300)) {
|
|
|
|
transactionSize =
|
|
|
|
tr.transaction.mutations.expectedSize(); // Old API versions didn't account for conflict ranges when
|
|
|
|
// determining whether to throw transaction_too_large
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2019-06-21 05:06:32 +08:00
|
|
|
if (transactionSize > options.sizeLimit) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return transaction_too_large();
|
2019-06-19 22:40:54 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!readVersion.isValid())
|
|
|
|
getReadVersion(
|
|
|
|
GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY); // sets up readVersion field. We had no reads, so no
|
|
|
|
// need for (expensive) full causal consistency.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-05-11 05:01:52 +08:00
|
|
|
bool isCheckingWrites = options.checkWritesEnabled && deterministicRandom()->random01() < 0.01;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < extraConflictRanges.size(); i++)
|
|
|
|
if (extraConflictRanges[i].isReady() &&
|
|
|
|
extraConflictRanges[i].get().first < extraConflictRanges[i].get().second)
|
|
|
|
tr.transaction.read_conflict_ranges.emplace_back(
|
|
|
|
tr.arena, extraConflictRanges[i].get().first, extraConflictRanges[i].get().second);
|
|
|
|
|
|
|
|
if (!options.causalWriteRisky &&
|
|
|
|
!intersects(tr.transaction.write_conflict_ranges, tr.transaction.read_conflict_ranges).present())
|
2017-05-26 04:48:44 +08:00
|
|
|
makeSelfConflicting();
|
|
|
|
|
|
|
|
if (isCheckingWrites) {
|
|
|
|
// add all writes into the read conflict range...
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.transaction.read_conflict_ranges.append(
|
|
|
|
tr.arena, tr.transaction.write_conflict_ranges.begin(), tr.transaction.write_conflict_ranges.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.debugDump) {
|
2019-05-11 05:01:52 +08:00
|
|
|
UID u = nondeterministicRandom()->randomUniqueID();
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("TransactionDump", u);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto i = tr.transaction.mutations.begin(); i != tr.transaction.mutations.end(); ++i)
|
|
|
|
TraceEvent("TransactionMutation", u)
|
|
|
|
.detail("T", i->type)
|
|
|
|
.detail("P1", i->param1)
|
|
|
|
.detail("P2", i->param2);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.lockAware) {
|
2018-02-10 10:21:29 +08:00
|
|
|
tr.flags = tr.flags | CommitTransactionRequest::FLAG_IS_LOCK_AWARE;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.firstInBatch) {
|
2018-02-10 10:21:29 +08:00
|
|
|
tr.flags = tr.flags | CommitTransactionRequest::FLAG_FIRST_IN_BATCH;
|
|
|
|
}
|
2020-03-27 06:52:30 +08:00
|
|
|
if (options.reportConflictingKeys) {
|
2020-03-25 00:48:03 +08:00
|
|
|
tr.transaction.report_conflicting_keys = true;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> commitResult =
|
|
|
|
tryCommit(cx, trLogInfo, tr, readVersion, info, &this->committedVersion, this, options);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
if (isCheckingWrites) {
|
|
|
|
Promise<Void> committed;
|
2021-03-11 02:06:03 +08:00
|
|
|
checkWrites(cx, commitResult, committed, tr, this);
|
2017-05-26 04:48:44 +08:00
|
|
|
return committed.getFuture();
|
|
|
|
}
|
|
|
|
return commitResult;
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("ClientCommitError").error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
return Future<Void>(e);
|
|
|
|
} catch (...) {
|
|
|
|
Error e(error_code_unknown_error);
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent("ClientCommitError").error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
return Future<Void>(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> commitAndWatch(Transaction* self) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(self->commitMutations());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!self->watches.empty()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
self->setupWatches();
|
|
|
|
}
|
|
|
|
|
|
|
|
self->reset();
|
|
|
|
return Void();
|
2021-03-11 02:06:03 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_actor_cancelled) {
|
|
|
|
if (!self->watches.empty()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
self->cancelWatches(e);
|
|
|
|
}
|
|
|
|
|
|
|
|
self->versionstampPromise.sendError(transaction_invalid_version());
|
|
|
|
self->reset();
|
|
|
|
}
|
|
|
|
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Void> Transaction::commit() {
|
|
|
|
ASSERT(!committing.isValid());
|
|
|
|
committing = commitAndWatch(this);
|
|
|
|
return committing;
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::setOption(FDBTransactionOptions::Option option, Optional<StringRef> value) {
|
|
|
|
switch (option) {
|
|
|
|
case FDBTransactionOptions::INITIALIZE_NEW_DATABASE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
if (readVersion.isValid())
|
|
|
|
throw read_version_already_set();
|
|
|
|
readVersion = Version(0);
|
|
|
|
options.causalWriteRisky = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::CAUSAL_READ_RISKY:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.getReadVersionFlags |= GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.priority = TransactionPriority::IMMEDIATE;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::PRIORITY_BATCH:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.priority = TransactionPriority::BATCH;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::CAUSAL_WRITE_RISKY:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.causalWriteRisky = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::COMMIT_ON_FIRST_PROXY:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.commitOnFirstProxy = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::CHECK_WRITES_ENABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.checkWritesEnabled = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::DEBUG_DUMP:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.debugDump = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::TRANSACTION_LOGGING_ENABLE:
|
|
|
|
setOption(FDBTransactionOptions::DEBUG_TRANSACTION_IDENTIFIER, value);
|
|
|
|
setOption(FDBTransactionOptions::LOG_TRANSACTION);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::DEBUG_TRANSACTION_IDENTIFIER:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
|
|
|
|
if (value.get().size() > 100 || value.get().size() == 0) {
|
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (trLogInfo) {
|
|
|
|
if (trLogInfo->identifier.empty()) {
|
|
|
|
trLogInfo->identifier = value.get().printable();
|
|
|
|
} else if (trLogInfo->identifier != value.get().printable()) {
|
|
|
|
TraceEvent(SevWarn, "CannotChangeDebugTransactionIdentifier")
|
|
|
|
.detail("PreviousIdentifier", trLogInfo->identifier)
|
|
|
|
.detail("NewIdentifier", value.get());
|
2019-02-13 05:54:24 +08:00
|
|
|
throw client_invalid_operation();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
|
|
|
trLogInfo = makeReference<TransactionLogInfo>(value.get().printable(), TransactionLogInfo::DONT_LOG);
|
|
|
|
trLogInfo->maxFieldLength = options.maxTransactionLoggingFieldLength;
|
|
|
|
}
|
|
|
|
if (info.debugID.present()) {
|
|
|
|
TraceEvent(SevInfo, "TransactionBeingTraced")
|
|
|
|
.detail("DebugTransactionID", trLogInfo->identifier)
|
|
|
|
.detail("ServerTraceID", info.debugID.get());
|
|
|
|
}
|
|
|
|
break;
|
2020-04-10 07:55:56 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
case FDBTransactionOptions::LOG_TRANSACTION:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
if (trLogInfo && !trLogInfo->identifier.empty()) {
|
|
|
|
trLogInfo->logTo(TransactionLogInfo::TRACE_LOG);
|
|
|
|
} else {
|
|
|
|
TraceEvent(SevWarn, "DebugTransactionIdentifierNotSet")
|
|
|
|
.detail("Error", "Debug Transaction Identifier option must be set before logging the transaction");
|
|
|
|
throw client_invalid_operation();
|
|
|
|
}
|
|
|
|
break;
|
2020-03-21 02:23:11 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
case FDBTransactionOptions::TRANSACTION_LOGGING_MAX_FIELD_LENGTH:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
{
|
|
|
|
int maxFieldLength = extractIntOption(value, -1, std::numeric_limits<int32_t>::max());
|
|
|
|
if (maxFieldLength == 0) {
|
2020-07-08 00:06:13 +08:00
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
options.maxTransactionLoggingFieldLength = maxFieldLength;
|
|
|
|
}
|
|
|
|
if (trLogInfo) {
|
|
|
|
trLogInfo->maxFieldLength = options.maxTransactionLoggingFieldLength;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::SERVER_REQUEST_TRACING:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
debugTransaction(deterministicRandom()->randomUniqueID());
|
|
|
|
if (trLogInfo && !trLogInfo->identifier.empty()) {
|
|
|
|
TraceEvent(SevInfo, "TransactionBeingTraced")
|
|
|
|
.detail("DebugTransactionID", trLogInfo->identifier)
|
|
|
|
.detail("ServerTraceID", info.debugID.get());
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::MAX_RETRY_DELAY:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
options.maxBackoff = extractIntOption(value, 0, std::numeric_limits<int32_t>::max()) / 1000.0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::SIZE_LIMIT:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
options.sizeLimit = extractIntOption(value, 32, CLIENT_KNOBS->TRANSACTION_SIZE_LIMIT);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::LOCK_AWARE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.lockAware = true;
|
|
|
|
options.readOnly = false;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::READ_LOCK_AWARE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
if (!options.lockAware) {
|
|
|
|
options.lockAware = true;
|
|
|
|
options.readOnly = true;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::FIRST_IN_BATCH:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.firstInBatch = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::USE_PROVISIONAL_PROXIES:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.getReadVersionFlags |= GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES;
|
|
|
|
info.useProvisionalProxies = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::INCLUDE_PORT_IN_ADDRESS:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.includePort = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::TAG:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
options.tags.addTag(value.get());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::AUTO_THROTTLE_TAG:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
options.tags.addTag(value.get());
|
|
|
|
options.readTags.addTag(value.get());
|
|
|
|
break;
|
|
|
|
|
|
|
|
case FDBTransactionOptions::SPAN_PARENT:
|
|
|
|
validateOptionValue(value, true);
|
|
|
|
if (value.get().size() != 16) {
|
|
|
|
throw invalid_option_value();
|
|
|
|
}
|
|
|
|
span.addParent(BinaryReader::fromStringRef<UID>(value.get(), Unversioned()));
|
|
|
|
break;
|
2020-07-08 00:06:13 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
case FDBTransactionOptions::REPORT_CONFLICTING_KEYS:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.reportConflictingKeys = true;
|
|
|
|
break;
|
2020-07-23 07:08:49 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
case FDBTransactionOptions::EXPENSIVE_CLEAR_COST_ESTIMATION_ENABLE:
|
|
|
|
validateOptionValue(value, false);
|
|
|
|
options.expensiveClearCostEstimation = true;
|
|
|
|
break;
|
2020-03-27 06:52:30 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
default:
|
|
|
|
break;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<GetReadVersionReply> getConsistentReadVersion(SpanID parentSpan,
|
|
|
|
DatabaseContext* cx,
|
|
|
|
uint32_t transactionCount,
|
|
|
|
TransactionPriority priority,
|
|
|
|
uint32_t flags,
|
|
|
|
TransactionTagMap<uint32_t> tags,
|
|
|
|
Optional<UID> debugID) {
|
2020-07-08 00:06:13 +08:00
|
|
|
state Span span("NAPI:getConsistentReadVersion"_loc, parentSpan);
|
2020-09-25 08:27:05 +08:00
|
|
|
|
2020-09-23 08:05:09 +08:00
|
|
|
++cx->transactionReadVersionBatches;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (debugID.present())
|
2020-09-23 08:05:09 +08:00
|
|
|
g_traceBatch.addEvent("TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.Before");
|
|
|
|
loop {
|
|
|
|
try {
|
2021-03-11 02:06:03 +08:00
|
|
|
state GetReadVersionRequest req(span.context, transactionCount, priority, flags, tags, debugID);
|
2020-09-25 08:27:05 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
|
|
|
when(GetReadVersionReply v = wait(basicLoadBalance(
|
|
|
|
cx->getGrvProxies(flags & GetReadVersionRequest::FLAG_USE_PROVISIONAL_PROXIES),
|
|
|
|
&GrvProxyInterface::getConsistentReadVersion,
|
|
|
|
req,
|
|
|
|
cx->taskID))) {
|
|
|
|
if (tags.size() != 0) {
|
|
|
|
auto& priorityThrottledTags = cx->throttledTags[priority];
|
|
|
|
for (auto& tag : tags) {
|
2020-05-19 04:45:51 +08:00
|
|
|
auto itr = v.tagThrottleInfo.find(tag.first);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (itr == v.tagThrottleInfo.end()) {
|
2020-05-19 04:45:51 +08:00
|
|
|
TEST(true); // Removing client throttle
|
|
|
|
priorityThrottledTags.erase(tag.first);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-05-19 04:45:51 +08:00
|
|
|
TEST(true); // Setting client throttle
|
|
|
|
auto result = priorityThrottledTags.try_emplace(tag.first, itr->second);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!result.second) {
|
2020-05-19 04:45:51 +08:00
|
|
|
result.first->second.update(itr->second);
|
|
|
|
}
|
2020-04-22 07:24:02 +08:00
|
|
|
}
|
2020-04-17 05:43:22 +08:00
|
|
|
}
|
2020-04-08 07:28:09 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (debugID.present())
|
|
|
|
g_traceBatch.addEvent(
|
|
|
|
"TransactionDebug", debugID.get().first(), "NativeAPI.getConsistentReadVersion.After");
|
|
|
|
ASSERT(v.version > 0);
|
2019-04-23 06:48:47 +08:00
|
|
|
cx->minAcceptableReadVersion = std::min(cx->minAcceptableReadVersion, v.version);
|
2017-05-26 04:48:44 +08:00
|
|
|
return v;
|
|
|
|
}
|
|
|
|
}
|
2020-09-23 08:05:09 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_broken_promise && e.code() != error_code_batch_transaction_throttled)
|
|
|
|
TraceEvent(SevError, "GetConsistentReadVersionError").error(e);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() == error_code_batch_transaction_throttled && !cx->apiVersionAtLeast(630)) {
|
2020-09-23 13:24:40 +08:00
|
|
|
wait(delayJittered(5.0));
|
2020-09-23 08:05:09 +08:00
|
|
|
} else {
|
|
|
|
throw;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> readVersionBatcher(DatabaseContext* cx,
|
|
|
|
FutureStream<DatabaseContext::VersionRequest> versionStream,
|
|
|
|
TransactionPriority priority,
|
|
|
|
uint32_t flags) {
|
|
|
|
state std::vector<Promise<GetReadVersionReply>> requests;
|
|
|
|
state PromiseStream<Future<Void>> addActor;
|
|
|
|
state Future<Void> collection = actorCollection(addActor.getFuture());
|
2017-05-26 04:48:44 +08:00
|
|
|
state Future<Void> timeout;
|
|
|
|
state Optional<UID> debugID;
|
|
|
|
state bool send_batch;
|
|
|
|
|
2020-04-24 11:50:40 +08:00
|
|
|
state TransactionTagMap<uint32_t> tags;
|
2020-04-04 06:24:14 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
// dynamic batching
|
|
|
|
state PromiseStream<double> replyTimes;
|
|
|
|
state PromiseStream<Error> _errorStream;
|
|
|
|
state double batchTime = 0;
|
2020-07-08 00:06:13 +08:00
|
|
|
state Span span("NAPI:readVersionBatcher"_loc);
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
send_batch = false;
|
|
|
|
choose {
|
2020-04-04 06:24:14 +08:00
|
|
|
when(DatabaseContext::VersionRequest req = waitNext(versionStream)) {
|
|
|
|
if (req.debugID.present()) {
|
2019-08-09 06:00:33 +08:00
|
|
|
if (!debugID.present()) {
|
2019-05-11 05:01:52 +08:00
|
|
|
debugID = nondeterministicRandom()->randomUniqueID();
|
2019-08-09 06:00:33 +08:00
|
|
|
}
|
2020-04-04 06:24:14 +08:00
|
|
|
g_traceBatch.addAttach("TransactionAttachID", req.debugID.get().first(), debugID.get().first());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-07-10 01:49:33 +08:00
|
|
|
span.addParent(req.spanContext);
|
2020-04-04 06:24:14 +08:00
|
|
|
requests.push_back(req.reply);
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto tag : req.tags) {
|
2020-04-17 05:43:22 +08:00
|
|
|
++tags[tag];
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-04-04 06:24:14 +08:00
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
if (requests.size() == CLIENT_KNOBS->MAX_BATCH_SIZE)
|
|
|
|
send_batch = true;
|
|
|
|
else if (!timeout.isValid())
|
2020-03-17 03:11:32 +08:00
|
|
|
timeout = delay(batchTime, TaskPriority::GetConsistentReadVersion);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2019-08-09 06:00:33 +08:00
|
|
|
when(wait(timeout.isValid() ? timeout : Never())) { send_batch = true; }
|
2017-05-26 04:48:44 +08:00
|
|
|
// dynamic batching monitors reply latencies
|
2019-08-09 06:00:33 +08:00
|
|
|
when(double reply_latency = waitNext(replyTimes.getFuture())) {
|
2017-05-26 04:48:44 +08:00
|
|
|
double target_latency = reply_latency * 0.5;
|
|
|
|
batchTime = min(0.1 * target_latency + 0.9 * batchTime, CLIENT_KNOBS->GRV_BATCH_TIMEOUT);
|
|
|
|
}
|
2019-08-09 06:00:33 +08:00
|
|
|
when(wait(collection)) {} // for errors
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
if (send_batch) {
|
|
|
|
int count = requests.size();
|
|
|
|
ASSERT(count);
|
|
|
|
// dynamic batching
|
|
|
|
Promise<GetReadVersionReply> GRVReply;
|
|
|
|
requests.push_back(GRVReply);
|
2020-02-01 07:48:29 +08:00
|
|
|
addActor.send(ready(timeReply(GRVReply.getFuture(), replyTimes)));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2019-08-09 06:00:33 +08:00
|
|
|
Future<Void> batch = incrementalBroadcastWithError(
|
2020-07-10 01:49:33 +08:00
|
|
|
getConsistentReadVersion(span.context, cx, count, priority, flags, std::move(tags), std::move(debugID)),
|
2021-03-11 02:06:03 +08:00
|
|
|
std::move(requests),
|
|
|
|
CLIENT_KNOBS->BROADCAST_BATCH_SIZE);
|
2020-04-10 07:55:56 +08:00
|
|
|
|
2020-07-08 00:06:13 +08:00
|
|
|
span = Span("NAPI:readVersionBatcher"_loc);
|
2020-04-17 05:43:22 +08:00
|
|
|
tags.clear();
|
2017-05-26 04:48:44 +08:00
|
|
|
debugID = Optional<UID>();
|
2020-05-19 04:45:51 +08:00
|
|
|
requests.clear();
|
2017-05-26 04:48:44 +08:00
|
|
|
addActor.send(batch);
|
|
|
|
timeout = Future<Void>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Version> extractReadVersion(Location location,
|
|
|
|
SpanID spanContext,
|
|
|
|
SpanID parent,
|
|
|
|
DatabaseContext* cx,
|
|
|
|
TransactionPriority priority,
|
|
|
|
Reference<TransactionLogInfo> trLogInfo,
|
|
|
|
Future<GetReadVersionReply> f,
|
|
|
|
bool lockAware,
|
|
|
|
double startTime,
|
|
|
|
Promise<Optional<Value>> metadataVersion,
|
2020-07-08 00:06:13 +08:00
|
|
|
TagSet tags) {
|
2020-08-12 02:19:04 +08:00
|
|
|
state Span span(spanContext, location, { parent });
|
2017-05-26 04:48:44 +08:00
|
|
|
GetReadVersionReply rep = wait(f);
|
|
|
|
double latency = now() - startTime;
|
|
|
|
cx->GRVLatencies.addSample(latency);
|
|
|
|
if (trLogInfo)
|
2021-03-11 02:06:03 +08:00
|
|
|
trLogInfo->addLog(FdbClientLogEvents::EventGetVersion_V3(
|
|
|
|
startTime, cx->clientLocality.dcId(), latency, priority, rep.version));
|
2020-03-14 01:17:49 +08:00
|
|
|
if (rep.version == 1 && rep.locked) {
|
|
|
|
throw proxy_memory_limit_exceeded();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rep.locked && !lockAware)
|
2017-05-26 04:48:44 +08:00
|
|
|
throw database_locked();
|
|
|
|
|
2020-03-06 06:00:44 +08:00
|
|
|
++cx->transactionReadVersionsCompleted;
|
2021-03-11 02:06:03 +08:00
|
|
|
switch (priority) {
|
|
|
|
case TransactionPriority::IMMEDIATE:
|
|
|
|
++cx->transactionImmediateReadVersionsCompleted;
|
|
|
|
break;
|
|
|
|
case TransactionPriority::DEFAULT:
|
|
|
|
++cx->transactionDefaultReadVersionsCompleted;
|
|
|
|
break;
|
|
|
|
case TransactionPriority::BATCH:
|
|
|
|
++cx->transactionBatchReadVersionsCompleted;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT(false);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (tags.size() != 0) {
|
|
|
|
auto& priorityThrottledTags = cx->throttledTags[priority];
|
|
|
|
for (auto& tag : tags) {
|
2020-05-19 04:45:51 +08:00
|
|
|
auto itr = priorityThrottledTags.find(tag);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (itr != priorityThrottledTags.end()) {
|
|
|
|
if (itr->second.expired()) {
|
2020-05-19 04:45:51 +08:00
|
|
|
priorityThrottledTags.erase(itr);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else if (itr->second.throttleDuration() > 0) {
|
2020-05-19 04:45:51 +08:00
|
|
|
TEST(true); // throttling transaction after getting read version
|
|
|
|
++cx->transactionReadVersionsThrottled;
|
|
|
|
throw tag_throttled();
|
|
|
|
}
|
2020-04-17 05:43:22 +08:00
|
|
|
}
|
2020-04-08 07:28:09 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tag : tags) {
|
2020-05-19 04:45:51 +08:00
|
|
|
auto itr = priorityThrottledTags.find(tag);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (itr != priorityThrottledTags.end()) {
|
2020-05-19 04:45:51 +08:00
|
|
|
itr->second.addReleased(1);
|
|
|
|
}
|
2020-04-17 05:43:22 +08:00
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (rep.version > cx->metadataVersionCache[cx->mvCacheInsertLocation].first) {
|
2019-08-09 06:00:33 +08:00
|
|
|
cx->mvCacheInsertLocation = (cx->mvCacheInsertLocation + 1) % cx->metadataVersionCache.size();
|
2019-03-03 05:55:41 +08:00
|
|
|
cx->metadataVersionCache[cx->mvCacheInsertLocation] = std::make_pair(rep.version, rep.metadataVersion);
|
2019-03-01 09:45:00 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
metadataVersion.send(rep.metadataVersion);
|
2017-05-26 04:48:44 +08:00
|
|
|
return rep.version;
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Version> Transaction::getReadVersion(uint32_t flags) {
|
|
|
|
if (!readVersion.isValid()) {
|
2019-08-13 06:55:48 +08:00
|
|
|
++cx->transactionReadVersions;
|
|
|
|
flags |= options.getReadVersionFlags;
|
2021-03-11 02:06:03 +08:00
|
|
|
switch (options.priority) {
|
|
|
|
case TransactionPriority::IMMEDIATE:
|
|
|
|
flags |= GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE;
|
|
|
|
++cx->transactionImmediateReadVersions;
|
|
|
|
break;
|
|
|
|
case TransactionPriority::DEFAULT:
|
|
|
|
flags |= GetReadVersionRequest::PRIORITY_DEFAULT;
|
|
|
|
++cx->transactionDefaultReadVersions;
|
|
|
|
break;
|
|
|
|
case TransactionPriority::BATCH:
|
|
|
|
flags |= GetReadVersionRequest::PRIORITY_BATCH;
|
|
|
|
++cx->transactionBatchReadVersions;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
ASSERT(false);
|
2020-03-06 06:00:44 +08:00
|
|
|
}
|
2019-08-13 06:55:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (options.tags.size() != 0) {
|
2020-05-19 04:45:51 +08:00
|
|
|
double maxThrottleDelay = 0.0;
|
|
|
|
bool canRecheck = false;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& priorityThrottledTags = cx->throttledTags[options.priority];
|
|
|
|
for (auto& tag : options.tags) {
|
2020-04-08 07:28:09 +08:00
|
|
|
auto itr = priorityThrottledTags.find(tag);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (itr != priorityThrottledTags.end()) {
|
|
|
|
if (!itr->second.expired()) {
|
2020-04-17 05:43:22 +08:00
|
|
|
maxThrottleDelay = std::max(maxThrottleDelay, itr->second.throttleDuration());
|
2020-04-22 02:56:58 +08:00
|
|
|
canRecheck = itr->second.canRecheck();
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-04-08 07:28:09 +08:00
|
|
|
priorityThrottledTags.erase(itr);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (maxThrottleDelay > 0.0 && !canRecheck) { // TODO: allow delaying?
|
2020-05-05 01:11:36 +08:00
|
|
|
TEST(true); // Throttling tag before GRV request
|
2020-04-22 07:23:24 +08:00
|
|
|
++cx->transactionReadVersionsThrottled;
|
2020-05-02 22:59:37 +08:00
|
|
|
readVersion = tag_throttled();
|
|
|
|
return readVersion;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-05-05 01:11:36 +08:00
|
|
|
TEST(maxThrottleDelay > 0.0); // Rechecking throttle
|
|
|
|
}
|
2020-04-22 07:23:24 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (auto& tag : options.tags) {
|
2020-04-22 07:23:24 +08:00
|
|
|
auto itr = priorityThrottledTags.find(tag);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (itr != priorityThrottledTags.end()) {
|
2020-04-22 07:23:24 +08:00
|
|
|
itr->second.updateChecked();
|
|
|
|
}
|
|
|
|
}
|
2020-03-06 06:00:44 +08:00
|
|
|
}
|
2019-08-13 06:55:48 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
auto& batcher = cx->versionBatcher[flags];
|
2019-08-13 06:55:48 +08:00
|
|
|
if (!batcher.actor.isValid()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
batcher.actor = readVersionBatcher(cx.getPtr(), batcher.stream.getFuture(), options.priority, flags);
|
2019-08-13 06:55:48 +08:00
|
|
|
}
|
|
|
|
|
2020-08-12 02:19:04 +08:00
|
|
|
Location location = "NAPI:getReadVersion"_loc;
|
|
|
|
UID spanContext = deterministicRandom()->randomUniqueID();
|
|
|
|
auto const req = DatabaseContext::VersionRequest(spanContext, options.tags, info.debugID);
|
2020-04-04 06:24:14 +08:00
|
|
|
batcher.stream.send(req);
|
2017-05-26 04:48:44 +08:00
|
|
|
startTime = now();
|
2021-03-11 02:06:03 +08:00
|
|
|
readVersion = extractReadVersion(location,
|
|
|
|
spanContext,
|
|
|
|
info.spanID,
|
|
|
|
cx.getPtr(),
|
|
|
|
options.priority,
|
|
|
|
trLogInfo,
|
|
|
|
req.reply.getFuture(),
|
|
|
|
options.lockAware,
|
|
|
|
startTime,
|
|
|
|
metadataVersion,
|
|
|
|
options.tags);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
return readVersion;
|
|
|
|
}
|
|
|
|
|
2020-01-16 11:15:35 +08:00
|
|
|
Optional<Version> Transaction::getCachedReadVersion() {
|
|
|
|
if (readVersion.isValid() && readVersion.isReady() && !readVersion.isError()) {
|
|
|
|
return readVersion.get();
|
|
|
|
} else {
|
|
|
|
return Optional<Version>();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
Future<Standalone<StringRef>> Transaction::getVersionstamp() {
|
2021-03-11 02:06:03 +08:00
|
|
|
if (committing.isValid()) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return transaction_invalid_version();
|
|
|
|
}
|
|
|
|
return versionstampPromise.getFuture();
|
|
|
|
}
|
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Gets the protocol version reported by a coordinator via the protocol info interface
|
|
|
|
ACTOR Future<ProtocolVersion> getCoordinatorProtocol(NetworkAddressList coordinatorAddresses) {
|
|
|
|
RequestStream<ProtocolInfoRequest> requestStream{ Endpoint{ { coordinatorAddresses }, WLTOKEN_PROTOCOL_INFO } };
|
|
|
|
ProtocolInfoReply reply = wait(retryBrokenPromise(requestStream, ProtocolInfoRequest{}));
|
2020-10-20 00:57:11 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
return reply.version;
|
|
|
|
}
|
2020-10-20 00:57:11 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Gets the protocol version reported by a coordinator in its connect packet
|
|
|
|
// If we are unable to get a version from the connect packet (e.g. because we lost connection with the peer), then this
|
|
|
|
// function will return with an unset result.
|
|
|
|
// If an expected version is given, this future won't return if the actual protocol version matches the expected version
|
|
|
|
ACTOR Future<Optional<ProtocolVersion>> getCoordinatorProtocolFromConnectPacket(
|
|
|
|
NetworkAddress coordinatorAddress,
|
|
|
|
Optional<ProtocolVersion> expectedVersion) {
|
2020-10-20 00:57:11 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
state Reference<AsyncVar<Optional<ProtocolVersion>>> protocolVersion =
|
|
|
|
FlowTransport::transport().getPeerProtocolAsyncVar(coordinatorAddress);
|
|
|
|
|
|
|
|
loop {
|
2021-04-24 06:00:21 +08:00
|
|
|
if (protocolVersion->get().present() && protocolVersion->get() != expectedVersion) {
|
2021-04-16 02:45:14 +08:00
|
|
|
return protocolVersion->get();
|
2020-10-20 00:57:11 +08:00
|
|
|
}
|
2020-10-20 05:34:31 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
Future<Void> change = protocolVersion->onChange();
|
|
|
|
if (!protocolVersion->get().present()) {
|
|
|
|
// If we still don't have any connection info after a timeout, retry sending the protocol version request
|
|
|
|
change = timeout(change, FLOW_KNOBS->CONNECTION_MONITOR_TIMEOUT, Void());
|
2020-10-20 00:57:11 +08:00
|
|
|
}
|
2020-10-20 05:34:31 +08:00
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
wait(change);
|
|
|
|
|
|
|
|
if (!protocolVersion->get().present()) {
|
|
|
|
return protocolVersion->get();
|
|
|
|
}
|
|
|
|
}
|
2020-10-20 00:57:11 +08:00
|
|
|
}
|
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Returns the protocol version reported by the given coordinator
|
2021-04-15 03:50:30 +08:00
|
|
|
// If an expected version is given, the future won't return until the protocol version is different than expected
|
2021-04-16 02:45:14 +08:00
|
|
|
ACTOR Future<ProtocolVersion> getClusterProtocolImpl(
|
|
|
|
Reference<AsyncVar<Optional<ClientLeaderRegInterface>>> coordinator,
|
|
|
|
Optional<ProtocolVersion> expectedVersion) {
|
|
|
|
|
|
|
|
state bool needToConnect = true;
|
|
|
|
state Future<ProtocolVersion> protocolVersion = Never();
|
|
|
|
|
2021-04-15 03:50:30 +08:00
|
|
|
loop {
|
2021-04-16 02:45:14 +08:00
|
|
|
if (!coordinator->get().present()) {
|
|
|
|
wait(coordinator->onChange());
|
2021-04-15 03:50:30 +08:00
|
|
|
} else {
|
2021-04-16 02:45:14 +08:00
|
|
|
Endpoint coordinatorEndpoint = coordinator->get().get().getLeader.getEndpoint();
|
|
|
|
if (needToConnect) {
|
|
|
|
// Even though we typically rely on the connect packet to get the protocol version, we need to send some
|
|
|
|
// request in order to start a connection. This protocol version request serves that purpose.
|
|
|
|
protocolVersion = getCoordinatorProtocol(coordinatorEndpoint.addresses);
|
|
|
|
needToConnect = false;
|
|
|
|
}
|
|
|
|
choose {
|
|
|
|
when(wait(coordinator->onChange())) { needToConnect = true; }
|
|
|
|
|
|
|
|
when(ProtocolVersion pv = wait(protocolVersion)) {
|
|
|
|
if (!expectedVersion.present() || expectedVersion.get() != pv) {
|
|
|
|
return pv;
|
|
|
|
}
|
2021-04-16 04:36:31 +08:00
|
|
|
|
|
|
|
protocolVersion = Never();
|
2021-04-16 02:45:14 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// Older versions of FDB don't have an endpoint to return the protocol version, so we get this info from
|
|
|
|
// the connect packet
|
|
|
|
when(Optional<ProtocolVersion> pv = wait(getCoordinatorProtocolFromConnectPacket(
|
|
|
|
coordinatorEndpoint.getPrimaryAddress(), expectedVersion))) {
|
|
|
|
if (pv.present()) {
|
|
|
|
return pv.get();
|
|
|
|
} else {
|
|
|
|
needToConnect = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2021-04-15 03:50:30 +08:00
|
|
|
}
|
|
|
|
}
|
2020-10-20 00:57:11 +08:00
|
|
|
}
|
|
|
|
|
2021-04-16 02:45:14 +08:00
|
|
|
// Returns the protocol version reported by the coordinator this client is currently connected to
|
|
|
|
// If an expected version is given, the future won't return until the protocol version is different than expected
|
2021-04-27 05:39:27 +08:00
|
|
|
// Note: this will never return if the server is running a protocol from FDB 5.0 or older
|
2021-04-16 02:45:14 +08:00
|
|
|
Future<ProtocolVersion> DatabaseContext::getClusterProtocol(Optional<ProtocolVersion> expectedVersion) {
|
|
|
|
return getClusterProtocolImpl(coordinator, expectedVersion);
|
|
|
|
}
|
|
|
|
|
2019-06-29 01:15:37 +08:00
|
|
|
uint32_t Transaction::getSize() {
|
|
|
|
auto s = tr.transaction.mutations.expectedSize() + tr.transaction.read_conflict_ranges.expectedSize() +
|
2021-03-11 02:06:03 +08:00
|
|
|
tr.transaction.write_conflict_ranges.expectedSize();
|
2019-06-29 01:15:37 +08:00
|
|
|
return s;
|
2019-06-26 07:32:27 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> Transaction::onError(Error const& e) {
|
2019-06-29 04:24:32 +08:00
|
|
|
if (e.code() == error_code_success) {
|
2017-05-26 04:48:44 +08:00
|
|
|
return client_invalid_operation();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() == error_code_not_committed || e.code() == error_code_commit_unknown_result ||
|
|
|
|
e.code() == error_code_database_locked || e.code() == error_code_proxy_memory_limit_exceeded ||
|
|
|
|
e.code() == error_code_process_behind || e.code() == error_code_batch_transaction_throttled ||
|
|
|
|
e.code() == error_code_tag_throttled) {
|
|
|
|
if (e.code() == error_code_not_committed)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsNotCommitted;
|
2020-02-01 06:16:39 +08:00
|
|
|
else if (e.code() == error_code_commit_unknown_result)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsMaybeCommitted;
|
2020-02-01 06:16:39 +08:00
|
|
|
else if (e.code() == error_code_proxy_memory_limit_exceeded)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsResourceConstrained;
|
2020-02-01 06:16:39 +08:00
|
|
|
else if (e.code() == error_code_process_behind)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsProcessBehind;
|
2020-04-24 11:50:40 +08:00
|
|
|
else if (e.code() == error_code_batch_transaction_throttled || e.code() == error_code_tag_throttled) {
|
2020-02-01 06:16:39 +08:00
|
|
|
++cx->transactionsThrottled;
|
2020-04-24 11:50:40 +08:00
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2018-06-02 06:21:40 +08:00
|
|
|
double backoff = getBackoff(e.code());
|
2019-06-22 13:59:12 +08:00
|
|
|
reset();
|
2019-06-29 04:24:32 +08:00
|
|
|
return delay(backoff, info.taskID);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (e.code() == error_code_transaction_too_old || e.code() == error_code_future_version) {
|
|
|
|
if (e.code() == error_code_transaction_too_old)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsTooOld;
|
2021-03-11 02:06:03 +08:00
|
|
|
else if (e.code() == error_code_future_version)
|
2019-07-09 05:01:04 +08:00
|
|
|
++cx->transactionsFutureVersions;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
double maxBackoff = options.maxBackoff;
|
2019-06-22 13:59:12 +08:00
|
|
|
reset();
|
2019-06-29 04:24:32 +08:00
|
|
|
return delay(std::min(CLIENT_KNOBS->FUTURE_VERSION_RETRY_DELAY, maxBackoff), info.taskID);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (g_network->isSimulated() && ++numErrors % 10 == 0)
|
2017-05-26 04:48:44 +08:00
|
|
|
TraceEvent(SevWarnAlways, "TransactionTooManyRetries").detail("NumRetries", numErrors);
|
|
|
|
|
|
|
|
return e;
|
|
|
|
}
|
2020-08-18 04:16:17 +08:00
|
|
|
ACTOR Future<StorageMetrics> getStorageMetricsLargeKeyRange(Database cx, KeyRange keys);
|
2020-01-22 11:52:49 +08:00
|
|
|
|
2020-08-18 04:16:17 +08:00
|
|
|
ACTOR Future<StorageMetrics> doGetStorageMetrics(Database cx, KeyRange keys, Reference<LocationInfo> locationInfo) {
|
2020-01-22 11:52:49 +08:00
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
WaitMetricsRequest req(keys, StorageMetrics(), StorageMetrics());
|
|
|
|
req.min.bytes = 0;
|
|
|
|
req.max.bytes = -1;
|
2021-03-11 02:06:03 +08:00
|
|
|
StorageMetrics m = wait(loadBalance(
|
|
|
|
locationInfo->locations(), &StorageServerInterface::waitMetrics, req, TaskPriority::DataDistribution));
|
2020-01-22 11:52:49 +08:00
|
|
|
return m;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
|
|
|
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
|
|
|
throw;
|
|
|
|
}
|
2020-02-13 07:01:26 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
2020-01-22 11:52:49 +08:00
|
|
|
cx->invalidateCache(keys);
|
|
|
|
StorageMetrics m = wait(getStorageMetricsLargeKeyRange(cx, keys));
|
|
|
|
return m;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-18 04:16:17 +08:00
|
|
|
ACTOR Future<StorageMetrics> getStorageMetricsLargeKeyRange(Database cx, KeyRange keys) {
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:GetStorageMetricsLargeKeyRange"_loc);
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
|
|
|
wait(getKeyRangeLocations(cx,
|
|
|
|
keys,
|
|
|
|
std::numeric_limits<int>::max(),
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::waitMetrics,
|
|
|
|
TransactionInfo(TaskPriority::DataDistribution, span.context)));
|
2020-01-22 11:52:49 +08:00
|
|
|
state int nLocs = locations.size();
|
|
|
|
state vector<Future<StorageMetrics>> fx(nLocs);
|
|
|
|
state StorageMetrics total;
|
2020-07-02 07:07:59 +08:00
|
|
|
KeyRef partBegin, partEnd;
|
2020-02-05 01:51:17 +08:00
|
|
|
for (int i = 0; i < nLocs; i++) {
|
2020-07-02 11:23:18 +08:00
|
|
|
partBegin = (i == 0) ? keys.begin : locations[i].first.begin;
|
|
|
|
partEnd = (i == nLocs - 1) ? keys.end : locations[i].first.end;
|
2020-07-02 07:07:59 +08:00
|
|
|
fx[i] = doGetStorageMetrics(cx, KeyRangeRef(partBegin, partEnd), locations[i].second);
|
2020-01-22 11:52:49 +08:00
|
|
|
}
|
|
|
|
wait(waitForAll(fx));
|
|
|
|
for (int i = 0; i < nLocs; i++) {
|
|
|
|
total += fx[i].get();
|
|
|
|
}
|
|
|
|
return total;
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> trackBoundedStorageMetrics(KeyRange keys,
|
|
|
|
Reference<LocationInfo> location,
|
|
|
|
StorageMetrics x,
|
|
|
|
StorageMetrics halfError,
|
|
|
|
PromiseStream<StorageMetrics> deltaStream) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
WaitMetricsRequest req(keys, x - halfError, x + halfError);
|
2020-01-27 13:25:15 +08:00
|
|
|
StorageMetrics nextX = wait(loadBalance(location->locations(), &StorageServerInterface::waitMetrics, req));
|
|
|
|
deltaStream.send(nextX - x);
|
2017-05-26 04:48:44 +08:00
|
|
|
x = nextX;
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
deltaStream.sendError(e);
|
|
|
|
throw e;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-15 08:10:22 +08:00
|
|
|
ACTOR Future<StorageMetrics> waitStorageMetricsMultipleLocations(
|
2021-03-11 02:06:03 +08:00
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations,
|
|
|
|
StorageMetrics min,
|
|
|
|
StorageMetrics max,
|
2020-01-22 11:52:49 +08:00
|
|
|
StorageMetrics permittedError) {
|
2017-05-26 04:48:44 +08:00
|
|
|
state int nLocs = locations.size();
|
2020-01-22 11:52:49 +08:00
|
|
|
state vector<Future<StorageMetrics>> fx(nLocs);
|
2017-05-26 04:48:44 +08:00
|
|
|
state StorageMetrics total;
|
|
|
|
state PromiseStream<StorageMetrics> deltas;
|
2021-03-11 02:06:03 +08:00
|
|
|
state vector<Future<Void>> wx(fx.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
state StorageMetrics halfErrorPerMachine = permittedError * (0.5 / nLocs);
|
2021-03-11 02:06:03 +08:00
|
|
|
state StorageMetrics maxPlus = max + halfErrorPerMachine * (nLocs - 1);
|
|
|
|
state StorageMetrics minMinus = min - halfErrorPerMachine * (nLocs - 1);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2020-02-05 01:51:17 +08:00
|
|
|
for (int i = 0; i < nLocs; i++) {
|
2017-05-26 04:48:44 +08:00
|
|
|
WaitMetricsRequest req(locations[i].first, StorageMetrics(), StorageMetrics());
|
|
|
|
req.min.bytes = 0;
|
|
|
|
req.max.bytes = -1;
|
2021-03-11 02:06:03 +08:00
|
|
|
fx[i] = loadBalance(locations[i].second->locations(),
|
|
|
|
&StorageServerInterface::waitMetrics,
|
|
|
|
req,
|
2020-01-27 13:25:15 +08:00
|
|
|
TaskPriority::DataDistribution);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2020-01-22 11:52:49 +08:00
|
|
|
wait(waitForAll(fx));
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
// invariant: true total is between (total-permittedError/2, total+permittedError/2)
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < nLocs; i++)
|
|
|
|
total += fx[i].get();
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!total.allLessOrEqual(maxPlus))
|
|
|
|
return total;
|
|
|
|
if (!minMinus.allLessOrEqual(total))
|
|
|
|
return total;
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
for (int i = 0; i < nLocs; i++)
|
|
|
|
wx[i] = trackBoundedStorageMetrics(
|
|
|
|
locations[i].first, locations[i].second, fx[i].get(), halfErrorPerMachine, deltas);
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
loop {
|
|
|
|
StorageMetrics delta = waitNext(deltas.getFuture());
|
|
|
|
total += delta;
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!total.allLessOrEqual(maxPlus))
|
|
|
|
return total;
|
|
|
|
if (!minMinus.allLessOrEqual(total))
|
|
|
|
return total;
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<StorageMetrics> extractMetrics(Future<std::pair<Optional<StorageMetrics>, int>> fMetrics) {
|
2020-01-16 01:33:27 +08:00
|
|
|
std::pair<Optional<StorageMetrics>, int> x = wait(fMetrics);
|
|
|
|
return x.first.get();
|
|
|
|
}
|
2020-01-13 07:30:36 +08:00
|
|
|
|
2020-08-13 05:27:34 +08:00
|
|
|
ACTOR Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> getReadHotRanges(Database cx, KeyRange keys) {
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:GetReadHotRanges"_loc);
|
2020-01-13 07:30:36 +08:00
|
|
|
loop {
|
|
|
|
int64_t shardLimit = 100; // Shard limit here does not really matter since this function is currently only used
|
|
|
|
// to find the read-hot sub ranges within a read-hot shard.
|
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(getKeyRangeLocations(cx,
|
|
|
|
keys,
|
|
|
|
shardLimit,
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::getReadHotRanges,
|
2020-07-10 01:49:33 +08:00
|
|
|
TransactionInfo(TaskPriority::DataDistribution, span.context)));
|
2020-01-13 07:30:36 +08:00
|
|
|
try {
|
|
|
|
// TODO: how to handle this?
|
|
|
|
// This function is called whenever a shard becomes read-hot. But somehow the shard was splitted across more
|
|
|
|
// than one storage server after become read-hot and before this function is called, i.e. a race condition.
|
|
|
|
// Should we abort and wait the newly splitted shards to be hot again?
|
|
|
|
state int nLocs = locations.size();
|
|
|
|
// if (nLocs > 1) {
|
|
|
|
// TraceEvent("RHDDebug")
|
|
|
|
// .detail("NumSSIs", nLocs)
|
|
|
|
// .detail("KeysBegin", keys.begin.printable().c_str())
|
|
|
|
// .detail("KeysEnd", keys.end.printable().c_str());
|
|
|
|
// }
|
|
|
|
state vector<Future<ReadHotSubRangeReply>> fReplies(nLocs);
|
2020-06-27 02:40:20 +08:00
|
|
|
KeyRef partBegin, partEnd;
|
2020-01-13 07:30:36 +08:00
|
|
|
for (int i = 0; i < nLocs; i++) {
|
2020-07-03 01:24:26 +08:00
|
|
|
partBegin = (i == 0) ? keys.begin : locations[i].first.begin;
|
|
|
|
partEnd = (i == nLocs - 1) ? keys.end : locations[i].first.end;
|
2020-06-27 02:40:20 +08:00
|
|
|
ReadHotSubRangeRequest req(KeyRangeRef(partBegin, partEnd));
|
2021-03-11 02:06:03 +08:00
|
|
|
fReplies[i] = loadBalance(locations[i].second->locations(),
|
|
|
|
&StorageServerInterface::getReadHotRanges,
|
|
|
|
req,
|
2020-01-13 07:30:36 +08:00
|
|
|
TaskPriority::DataDistribution);
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(waitForAll(fReplies));
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (nLocs == 1) {
|
2020-09-02 04:14:27 +08:00
|
|
|
TEST(true); // Single-shard read hot range request
|
|
|
|
return fReplies[0].get().readHotRanges;
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
2020-09-02 04:14:27 +08:00
|
|
|
TEST(true); // Multi-shard read hot range request
|
|
|
|
Standalone<VectorRef<ReadHotRangeWithMetrics>> results;
|
|
|
|
for (int i = 0; i < nLocs; i++) {
|
2021-03-11 02:06:03 +08:00
|
|
|
results.append(results.arena(),
|
|
|
|
fReplies[i].get().readHotRanges.begin(),
|
|
|
|
fReplies[i].get().readHotRanges.size());
|
2020-09-02 04:14:27 +08:00
|
|
|
results.arena().dependsOn(fReplies[i].get().readHotRanges.arena());
|
|
|
|
}
|
2020-01-13 07:30:36 +08:00
|
|
|
|
2020-09-02 04:14:27 +08:00
|
|
|
return results;
|
|
|
|
}
|
2020-01-13 07:30:36 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
|
|
|
TraceEvent(SevError, "GetReadHotSubRangesError").error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
cx->invalidateCache(keys);
|
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2020-08-13 03:05:01 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<std::pair<Optional<StorageMetrics>, int>> waitStorageMetrics(Database cx,
|
|
|
|
KeyRange keys,
|
|
|
|
StorageMetrics min,
|
|
|
|
StorageMetrics max,
|
|
|
|
StorageMetrics permittedError,
|
|
|
|
int shardLimit,
|
|
|
|
int expectedShardCount) {
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:WaitStorageMetrics"_loc);
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2020-07-10 01:49:33 +08:00
|
|
|
vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(getKeyRangeLocations(cx,
|
|
|
|
keys,
|
|
|
|
shardLimit,
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::waitMetrics,
|
2020-07-10 01:49:33 +08:00
|
|
|
TransactionInfo(TaskPriority::DataDistribution, span.context)));
|
|
|
|
if (expectedShardCount >= 0 && locations.size() != expectedShardCount) {
|
2020-01-16 01:33:27 +08:00
|
|
|
return std::make_pair(Optional<StorageMetrics>(), locations.size());
|
|
|
|
}
|
2018-06-21 00:21:23 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better
|
|
|
|
// solution to this.
|
|
|
|
if (locations.size() < shardLimit) {
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2018-11-03 04:15:09 +08:00
|
|
|
Future<StorageMetrics> fx;
|
2017-05-26 04:48:44 +08:00
|
|
|
if (locations.size() > 1) {
|
2020-01-22 11:52:49 +08:00
|
|
|
fx = waitStorageMetricsMultipleLocations(locations, min, max, permittedError);
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
2021-03-11 02:06:03 +08:00
|
|
|
WaitMetricsRequest req(keys, min, max);
|
|
|
|
fx = loadBalance(locations[0].second->locations(),
|
|
|
|
&StorageServerInterface::waitMetrics,
|
|
|
|
req,
|
2020-01-27 13:25:15 +08:00
|
|
|
TaskPriority::DataDistribution);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-11-03 04:15:09 +08:00
|
|
|
StorageMetrics x = wait(fx);
|
2021-03-11 02:06:03 +08:00
|
|
|
return std::make_pair(x, -1);
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevError, "WaitStorageMetricsError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
|
|
|
cx->invalidateCache(keys);
|
2019-06-25 17:47:35 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
2018-11-03 04:15:09 +08:00
|
|
|
} else {
|
|
|
|
TraceEvent(SevWarn, "WaitStorageMetricsPenalty")
|
2021-03-11 02:06:03 +08:00
|
|
|
.detail("Keys", keys)
|
|
|
|
.detail("Limit", CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT)
|
|
|
|
.detail("JitteredSecondsOfPenitence", CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY);
|
2019-06-25 17:47:35 +08:00
|
|
|
wait(delayJittered(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
|
2018-11-03 04:15:09 +08:00
|
|
|
// make sure that the next getKeyRangeLocations() call will actually re-fetch the range
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->invalidateCache(keys);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<std::pair<Optional<StorageMetrics>, int>> Transaction::waitStorageMetrics(KeyRange const& keys,
|
|
|
|
StorageMetrics const& min,
|
|
|
|
StorageMetrics const& max,
|
|
|
|
StorageMetrics const& permittedError,
|
|
|
|
int shardLimit,
|
|
|
|
int expectedShardCount) {
|
|
|
|
return ::waitStorageMetrics(cx, keys, min, max, permittedError, shardLimit, expectedShardCount);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<StorageMetrics> Transaction::getStorageMetrics(KeyRange const& keys, int shardLimit) {
|
2020-01-22 11:52:49 +08:00
|
|
|
if (shardLimit > 0) {
|
|
|
|
StorageMetrics m;
|
|
|
|
m.bytes = -1;
|
2020-01-24 05:02:07 +08:00
|
|
|
return extractMetrics(::waitStorageMetrics(cx, keys, StorageMetrics(), m, StorageMetrics(), shardLimit, -1));
|
2020-01-22 11:52:49 +08:00
|
|
|
} else {
|
|
|
|
return ::getStorageMetricsLargeKeyRange(cx, keys);
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Standalone<VectorRef<DDMetricsRef>>> waitDataDistributionMetricsList(Database cx,
|
|
|
|
KeyRange keys,
|
|
|
|
int shardLimit) {
|
2019-05-23 07:58:28 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2020-07-23 03:20:22 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2019-05-23 07:58:28 +08:00
|
|
|
when(ErrorOr<GetDDMetricsReply> rep =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(errorOr(basicLoadBalance(cx->getCommitProxies(false),
|
|
|
|
&CommitProxyInterface::getDDMetrics,
|
2020-09-11 08:44:15 +08:00
|
|
|
GetDDMetricsRequest(keys, shardLimit))))) {
|
2019-05-23 07:58:28 +08:00
|
|
|
if (rep.isError()) {
|
|
|
|
throw rep.getError();
|
|
|
|
}
|
|
|
|
return rep.get().storageMetricsList;
|
|
|
|
}
|
|
|
|
}
|
2019-05-18 07:11:50 +08:00
|
|
|
}
|
2019-05-17 07:46:33 +08:00
|
|
|
}
|
|
|
|
|
2020-08-13 05:27:34 +08:00
|
|
|
Future<Standalone<VectorRef<ReadHotRangeWithMetrics>>> Transaction::getReadHotRanges(KeyRange const& keys) {
|
2020-01-13 07:30:36 +08:00
|
|
|
return ::getReadHotRanges(cx, keys);
|
|
|
|
}
|
|
|
|
|
2020-06-19 00:37:49 +08:00
|
|
|
ACTOR Future<Standalone<VectorRef<KeyRef>>> getRangeSplitPoints(Database cx, KeyRange keys, int64_t chunkSize) {
|
2020-08-08 05:03:42 +08:00
|
|
|
state Span span("NAPI:GetRangeSplitPoints"_loc);
|
2020-06-19 00:37:49 +08:00
|
|
|
loop {
|
|
|
|
state vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
2021-03-11 02:06:03 +08:00
|
|
|
wait(getKeyRangeLocations(cx,
|
|
|
|
keys,
|
|
|
|
100,
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::getRangeSplitPoints,
|
2020-08-08 05:03:42 +08:00
|
|
|
TransactionInfo(TaskPriority::DataDistribution, span.context)));
|
2020-06-19 00:37:49 +08:00
|
|
|
try {
|
|
|
|
state int nLocs = locations.size();
|
|
|
|
state vector<Future<SplitRangeReply>> fReplies(nLocs);
|
2020-06-27 02:40:20 +08:00
|
|
|
KeyRef partBegin, partEnd;
|
2020-06-19 00:37:49 +08:00
|
|
|
for (int i = 0; i < nLocs; i++) {
|
2020-07-03 01:24:26 +08:00
|
|
|
partBegin = (i == 0) ? keys.begin : locations[i].first.begin;
|
|
|
|
partEnd = (i == nLocs - 1) ? keys.end : locations[i].first.end;
|
2020-06-27 02:40:20 +08:00
|
|
|
SplitRangeRequest req(KeyRangeRef(partBegin, partEnd), chunkSize);
|
2021-03-11 02:06:03 +08:00
|
|
|
fReplies[i] = loadBalance(locations[i].second->locations(),
|
|
|
|
&StorageServerInterface::getRangeSplitPoints,
|
|
|
|
req,
|
2020-06-19 00:37:49 +08:00
|
|
|
TaskPriority::DataDistribution);
|
|
|
|
}
|
|
|
|
|
|
|
|
wait(waitForAll(fReplies));
|
|
|
|
Standalone<VectorRef<KeyRef>> results;
|
|
|
|
|
2020-06-27 02:40:20 +08:00
|
|
|
results.push_back_deep(results.arena(), keys.begin);
|
2020-06-19 00:37:49 +08:00
|
|
|
for (int i = 0; i < nLocs; i++) {
|
|
|
|
if (i > 0) {
|
|
|
|
results.push_back_deep(results.arena(), locations[i].first.begin); // Need this shard boundary
|
|
|
|
}
|
2020-09-04 04:56:23 +08:00
|
|
|
if (fReplies[i].get().splitPoints.size() > 0) {
|
2021-03-11 02:06:03 +08:00
|
|
|
results.append(
|
|
|
|
results.arena(), fReplies[i].get().splitPoints.begin(), fReplies[i].get().splitPoints.size());
|
2020-09-04 04:56:23 +08:00
|
|
|
results.arena().dependsOn(fReplies[i].get().splitPoints.arena());
|
|
|
|
}
|
2020-06-19 00:37:49 +08:00
|
|
|
}
|
2020-06-27 02:40:20 +08:00
|
|
|
if (results.back() != keys.end) {
|
|
|
|
results.push_back_deep(results.arena(), keys.end);
|
|
|
|
}
|
2020-06-19 00:37:49 +08:00
|
|
|
|
|
|
|
return results;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
|
|
|
TraceEvent(SevError, "GetRangeSplitPoints").error(e);
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
cx->invalidateCache(keys);
|
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
Future<Standalone<VectorRef<KeyRef>>> Transaction::getRangeSplitPoints(KeyRange const& keys, int64_t chunkSize) {
|
|
|
|
return ::getRangeSplitPoints(cx, keys, chunkSize);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Standalone<VectorRef<KeyRef>>> splitStorageMetrics(Database cx,
|
|
|
|
KeyRange keys,
|
|
|
|
StorageMetrics limit,
|
|
|
|
StorageMetrics estimated) {
|
2020-07-10 01:49:33 +08:00
|
|
|
state Span span("NAPI:SplitStorageMetrics"_loc);
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
2021-03-11 02:06:03 +08:00
|
|
|
state vector<pair<KeyRange, Reference<LocationInfo>>> locations =
|
|
|
|
wait(getKeyRangeLocations(cx,
|
|
|
|
keys,
|
|
|
|
CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT,
|
|
|
|
false,
|
|
|
|
&StorageServerInterface::splitMetrics,
|
|
|
|
TransactionInfo(TaskPriority::DataDistribution, span.context)));
|
2017-05-26 04:48:44 +08:00
|
|
|
state StorageMetrics used;
|
|
|
|
state Standalone<VectorRef<KeyRef>> results;
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
// SOMEDAY: Right now, if there are too many shards we delay and check again later. There may be a better
|
|
|
|
// solution to this.
|
|
|
|
if (locations.size() == CLIENT_KNOBS->STORAGE_METRICS_SHARD_LIMIT) {
|
2019-06-25 17:47:35 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->STORAGE_METRICS_TOO_MANY_SHARDS_DELAY, TaskPriority::DataDistribution));
|
2017-05-26 04:48:44 +08:00
|
|
|
cx->invalidateCache(keys);
|
2021-03-11 02:06:03 +08:00
|
|
|
} else {
|
|
|
|
results.push_back_deep(results.arena(), keys.begin);
|
2017-05-26 04:48:44 +08:00
|
|
|
try {
|
2018-06-09 02:11:08 +08:00
|
|
|
//TraceEvent("SplitStorageMetrics").detail("Locations", locations.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
state int i = 0;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (; i < locations.size(); i++) {
|
|
|
|
SplitMetricsRequest req(locations[i].first, limit, used, estimated, i == locations.size() - 1);
|
|
|
|
SplitMetricsReply res = wait(loadBalance(locations[i].second->locations(),
|
|
|
|
&StorageServerInterface::splitMetrics,
|
|
|
|
req,
|
|
|
|
TaskPriority::DataDistribution));
|
2020-01-27 13:25:15 +08:00
|
|
|
if (res.splits.size() &&
|
|
|
|
res.splits[0] <= results.back()) { // split points are out of order, possibly because of moving
|
|
|
|
// data, throw error to retry
|
2021-03-11 02:06:03 +08:00
|
|
|
ASSERT_WE_THINK(
|
|
|
|
false); // FIXME: This seems impossible and doesn't seem to be covered by testing
|
2017-05-26 04:48:44 +08:00
|
|
|
throw all_alternatives_failed();
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
if (res.splits.size()) {
|
|
|
|
results.append(results.arena(), res.splits.begin(), res.splits.size());
|
|
|
|
results.arena().dependsOn(res.splits.arena());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
used = res.used;
|
|
|
|
|
2018-06-09 02:11:08 +08:00
|
|
|
//TraceEvent("SplitStorageMetricsResult").detail("Used", used.bytes).detail("Location", i).detail("Size", res.splits.size());
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (used.allLessOrEqual(limit * CLIENT_KNOBS->STORAGE_METRICS_UNFAIR_SPLIT_LIMIT)) {
|
2017-05-26 04:48:44 +08:00
|
|
|
results.resize(results.arena(), results.size() - 1);
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
results.push_back_deep(results.arena(), keys.end);
|
2017-05-26 04:48:44 +08:00
|
|
|
return results;
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() != error_code_wrong_shard_server && e.code() != error_code_all_alternatives_failed) {
|
2018-06-09 02:11:08 +08:00
|
|
|
TraceEvent(SevError, "SplitStorageMetricsError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
throw;
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
cx->invalidateCache(keys);
|
2019-06-25 17:47:35 +08:00
|
|
|
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, TaskPriority::DataDistribution));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Standalone<VectorRef<KeyRef>>> Transaction::splitStorageMetrics(KeyRange const& keys,
|
|
|
|
StorageMetrics const& limit,
|
|
|
|
StorageMetrics const& estimated) {
|
|
|
|
return ::splitStorageMetrics(cx, keys, limit, estimated);
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
void Transaction::checkDeferredError() {
|
|
|
|
cx->checkDeferredError();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Reference<TransactionLogInfo> Transaction::createTrLogInfoProbabilistically(const Database& cx) {
|
|
|
|
if (!cx->isError()) {
|
2021-03-24 07:22:39 +08:00
|
|
|
double clientSamplingProbability = GlobalConfig::globalConfig().get<double>(
|
|
|
|
fdbClientInfoTxnSampleRate, CLIENT_KNOBS->CSI_SAMPLING_PROBABILITY);
|
2021-03-11 02:06:03 +08:00
|
|
|
if (((networkOptions.logClientInfo.present() && networkOptions.logClientInfo.get()) || BUGGIFY) &&
|
|
|
|
deterministicRandom()->random01() < clientSamplingProbability &&
|
|
|
|
(!g_network->isSimulated() || !g_simulator.speedUpSimulation)) {
|
2020-11-07 15:50:55 +08:00
|
|
|
return makeReference<TransactionLogInfo>(TransactionLogInfo::DATABASE);
|
2018-10-03 06:28:46 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return Reference<TransactionLogInfo>();
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
|
2020-12-04 06:06:11 +08:00
|
|
|
void Transaction::setTransactionID(uint64_t id) {
|
|
|
|
ASSERT(getSize() == 0);
|
|
|
|
info.spanID = SpanID(id, info.spanID.second());
|
|
|
|
}
|
|
|
|
|
|
|
|
void Transaction::setToken(uint64_t token) {
|
|
|
|
ASSERT(getSize() == 0);
|
|
|
|
info.spanID = SpanID(info.spanID.first(), token);
|
|
|
|
}
|
|
|
|
|
2017-05-26 04:48:44 +08:00
|
|
|
void enableClientInfoLogging() {
|
|
|
|
ASSERT(networkOptions.logClientInfo.present() == false);
|
|
|
|
networkOptions.logClientInfo = true;
|
|
|
|
TraceEvent(SevInfo, "ClientInfoLoggingEnabled");
|
2017-11-03 04:39:06 +08:00
|
|
|
}
|
2019-02-28 07:40:33 +08:00
|
|
|
|
2019-08-29 01:52:56 +08:00
|
|
|
ACTOR Future<Void> snapCreate(Database cx, Standalone<StringRef> snapCmd, UID snapUID) {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("SnapCreateEnter").detail("SnapCmd", snapCmd.toString()).detail("UID", snapUID);
|
2019-02-28 07:40:33 +08:00
|
|
|
try {
|
2019-08-24 02:59:49 +08:00
|
|
|
loop {
|
|
|
|
choose {
|
2020-07-23 03:20:22 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(basicLoadBalance(cx->getCommitProxies(false),
|
|
|
|
&CommitProxyInterface::proxySnapReq,
|
|
|
|
ProxySnapRequest(snapCmd, snapUID, snapUID),
|
|
|
|
cx->taskID,
|
2020-09-11 08:44:15 +08:00
|
|
|
true /*atmostOnce*/))) {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("SnapCreateExit").detail("SnapCmd", snapCmd.toString()).detail("UID", snapUID);
|
2019-08-24 02:59:49 +08:00
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2019-02-28 07:40:33 +08:00
|
|
|
} catch (Error& e) {
|
2021-03-11 02:06:03 +08:00
|
|
|
TraceEvent("SnapCreateError").detail("SnapCmd", snapCmd.toString()).detail("UID", snapUID).error(e);
|
2019-02-28 07:40:33 +08:00
|
|
|
throw;
|
|
|
|
}
|
2019-06-20 02:11:22 +08:00
|
|
|
}
|
2019-08-09 07:30:05 +08:00
|
|
|
|
2019-08-17 06:13:53 +08:00
|
|
|
ACTOR Future<bool> checkSafeExclusions(Database cx, vector<AddressExclusion> exclusions) {
|
2019-08-21 05:43:48 +08:00
|
|
|
TraceEvent("ExclusionSafetyCheckBegin")
|
|
|
|
.detail("NumExclusion", exclusions.size())
|
|
|
|
.detail("Exclusions", describe(exclusions));
|
|
|
|
state ExclusionSafetyCheckRequest req(exclusions);
|
|
|
|
state bool ddCheck;
|
2019-09-25 01:04:56 +08:00
|
|
|
try {
|
|
|
|
loop {
|
|
|
|
choose {
|
2020-07-23 03:20:22 +08:00
|
|
|
when(wait(cx->onProxiesChanged())) {}
|
2019-09-25 01:04:56 +08:00
|
|
|
when(ExclusionSafetyCheckReply _ddCheck =
|
2020-09-11 08:44:15 +08:00
|
|
|
wait(basicLoadBalance(cx->getCommitProxies(false),
|
2021-03-11 02:06:03 +08:00
|
|
|
&CommitProxyInterface::exclusionSafetyCheckReq,
|
|
|
|
req,
|
|
|
|
cx->taskID))) {
|
2019-09-25 01:04:56 +08:00
|
|
|
ddCheck = _ddCheck.safe;
|
|
|
|
break;
|
|
|
|
}
|
2019-08-21 05:43:48 +08:00
|
|
|
}
|
|
|
|
}
|
2019-09-25 01:04:56 +08:00
|
|
|
} catch (Error& e) {
|
2019-09-27 04:52:53 +08:00
|
|
|
if (e.code() != error_code_actor_cancelled) {
|
|
|
|
TraceEvent("ExclusionSafetyCheckError")
|
|
|
|
.detail("NumExclusion", exclusions.size())
|
|
|
|
.detail("Exclusions", describe(exclusions))
|
|
|
|
.error(e);
|
|
|
|
}
|
2019-09-25 01:04:56 +08:00
|
|
|
throw;
|
2019-08-21 05:43:48 +08:00
|
|
|
}
|
2019-08-22 02:52:44 +08:00
|
|
|
TraceEvent("ExclusionSafetyCheckCoordinators");
|
2019-08-17 06:13:53 +08:00
|
|
|
state ClientCoordinators coordinatorList(cx->getConnectionFile());
|
|
|
|
state vector<Future<Optional<LeaderInfo>>> leaderServers;
|
2021-03-04 11:36:21 +08:00
|
|
|
leaderServers.reserve(coordinatorList.clientLeaderServers.size());
|
2019-08-17 06:13:53 +08:00
|
|
|
for (int i = 0; i < coordinatorList.clientLeaderServers.size(); i++) {
|
|
|
|
leaderServers.push_back(retryBrokenPromise(coordinatorList.clientLeaderServers[i].getLeader,
|
|
|
|
GetLeaderRequest(coordinatorList.clusterKey, UID()),
|
|
|
|
TaskPriority::CoordinationReply));
|
|
|
|
}
|
2019-10-19 06:00:00 +08:00
|
|
|
// Wait for quorum so we don't dismiss live coordinators as unreachable by acting too fast
|
2019-10-19 05:52:07 +08:00
|
|
|
choose {
|
|
|
|
when(wait(smartQuorum(leaderServers, leaderServers.size() / 2 + 1, 1.0))) {}
|
|
|
|
when(wait(delay(3.0))) {
|
|
|
|
TraceEvent("ExclusionSafetyCheckNoCoordinatorQuorum");
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2019-08-17 06:13:53 +08:00
|
|
|
int attemptCoordinatorExclude = 0;
|
|
|
|
int coordinatorsUnavailable = 0;
|
|
|
|
for (int i = 0; i < leaderServers.size(); i++) {
|
|
|
|
NetworkAddress leaderAddress =
|
|
|
|
coordinatorList.clientLeaderServers[i].getLeader.getEndpoint().getPrimaryAddress();
|
|
|
|
if (leaderServers[i].isReady()) {
|
2021-03-11 02:06:03 +08:00
|
|
|
if ((std::count(
|
|
|
|
exclusions.begin(), exclusions.end(), AddressExclusion(leaderAddress.ip, leaderAddress.port)) ||
|
2019-08-17 06:13:53 +08:00
|
|
|
std::count(exclusions.begin(), exclusions.end(), AddressExclusion(leaderAddress.ip)))) {
|
|
|
|
attemptCoordinatorExclude++;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
coordinatorsUnavailable++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
int faultTolerance = (leaderServers.size() - 1) / 2 - coordinatorsUnavailable;
|
2019-08-21 05:43:48 +08:00
|
|
|
bool coordinatorCheck = (attemptCoordinatorExclude <= faultTolerance);
|
2019-08-22 02:52:44 +08:00
|
|
|
TraceEvent("ExclusionSafetyCheckFinish")
|
2019-08-17 06:13:53 +08:00
|
|
|
.detail("CoordinatorListSize", leaderServers.size())
|
|
|
|
.detail("NumExclusions", exclusions.size())
|
|
|
|
.detail("FaultTolerance", faultTolerance)
|
2019-08-21 05:43:48 +08:00
|
|
|
.detail("AttemptCoordinatorExclude", attemptCoordinatorExclude)
|
|
|
|
.detail("CoordinatorCheck", coordinatorCheck)
|
|
|
|
.detail("DataDistributorCheck", ddCheck);
|
2019-08-17 06:13:53 +08:00
|
|
|
|
|
|
|
return (ddCheck && coordinatorCheck);
|
2019-08-09 07:30:05 +08:00
|
|
|
}
|
2020-07-31 07:07:54 +08:00
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
ACTOR Future<Void> addInterfaceActor(std::map<Key, std::pair<Value, ClientLeaderRegInterface>>* address_interface,
|
|
|
|
Reference<FlowLock> connectLock,
|
|
|
|
KeyValue kv) {
|
2020-07-31 07:07:54 +08:00
|
|
|
wait(connectLock->take());
|
|
|
|
state FlowLock::Releaser releaser(*connectLock);
|
2021-03-11 02:06:03 +08:00
|
|
|
state ClientWorkerInterface workerInterf =
|
|
|
|
BinaryReader::fromStringRef<ClientWorkerInterface>(kv.value, IncludeVersion());
|
2020-07-31 07:07:54 +08:00
|
|
|
state ClientLeaderRegInterface leaderInterf(workerInterf.address());
|
|
|
|
choose {
|
2021-03-11 02:06:03 +08:00
|
|
|
when(Optional<LeaderInfo> rep =
|
|
|
|
wait(brokenPromiseToNever(leaderInterf.getLeader.getReply(GetLeaderRequest())))) {
|
2020-07-31 07:07:54 +08:00
|
|
|
StringRef ip_port =
|
2021-03-11 02:06:03 +08:00
|
|
|
kv.key.endsWith(LiteralStringRef(":tls")) ? kv.key.removeSuffix(LiteralStringRef(":tls")) : kv.key;
|
2020-07-31 07:07:54 +08:00
|
|
|
(*address_interface)[ip_port] = std::make_pair(kv.value, leaderInterf);
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
if (workerInterf.reboot.getEndpoint().addresses.secondaryAddress.present()) {
|
2020-07-31 07:07:54 +08:00
|
|
|
Key full_ip_port2 =
|
|
|
|
StringRef(workerInterf.reboot.getEndpoint().addresses.secondaryAddress.get().toString());
|
2021-03-11 02:06:03 +08:00
|
|
|
StringRef ip_port2 = full_ip_port2.endsWith(LiteralStringRef(":tls"))
|
|
|
|
? full_ip_port2.removeSuffix(LiteralStringRef(":tls"))
|
|
|
|
: full_ip_port2;
|
2020-07-31 07:07:54 +08:00
|
|
|
(*address_interface)[ip_port2] = std::make_pair(kv.value, leaderInterf);
|
|
|
|
}
|
|
|
|
}
|
2021-03-11 02:06:03 +08:00
|
|
|
when(wait(delay(CLIENT_KNOBS->CLI_CONNECT_TIMEOUT))) {} // NOTE : change timeout time here if necessary
|
2020-07-31 07:07:54 +08:00
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
|
2021-01-21 16:42:23 +08:00
|
|
|
ACTOR static Future<int64_t> rebootWorkerActor(DatabaseContext* cx, ValueRef addr, bool check, int duration) {
|
2020-12-22 05:29:16 +08:00
|
|
|
// ignore negative value
|
2021-03-11 02:06:03 +08:00
|
|
|
if (duration < 0)
|
|
|
|
duration = 0;
|
2020-07-31 07:07:54 +08:00
|
|
|
// fetch the addresses of all workers
|
2021-03-11 02:06:03 +08:00
|
|
|
state std::map<Key, std::pair<Value, ClientLeaderRegInterface>> address_interface;
|
2020-11-20 08:43:39 +08:00
|
|
|
if (!cx->getConnectionFile())
|
|
|
|
return 0;
|
2021-05-04 04:14:16 +08:00
|
|
|
RangeResult kvs = wait(getWorkerInterfaces(cx->getConnectionFile()));
|
2020-07-31 07:07:54 +08:00
|
|
|
ASSERT(!kvs.more);
|
2020-12-21 14:47:55 +08:00
|
|
|
// Note: reuse this knob from fdbcli, change it if necessary
|
2020-07-31 07:07:54 +08:00
|
|
|
Reference<FlowLock> connectLock(new FlowLock(CLIENT_KNOBS->CLI_CONNECT_PARALLELISM));
|
|
|
|
std::vector<Future<Void>> addInterfs;
|
2021-03-11 02:06:03 +08:00
|
|
|
for (const auto& it : kvs) {
|
2020-09-15 07:45:34 +08:00
|
|
|
addInterfs.push_back(addInterfaceActor(&address_interface, connectLock, it));
|
2020-07-31 07:07:54 +08:00
|
|
|
}
|
2020-12-22 05:29:16 +08:00
|
|
|
wait(waitForAll(addInterfs));
|
2021-03-11 02:06:03 +08:00
|
|
|
if (!address_interface.count(addr))
|
|
|
|
return 0;
|
2020-11-24 06:55:29 +08:00
|
|
|
|
2020-12-22 05:29:16 +08:00
|
|
|
BinaryReader::fromStringRef<ClientWorkerInterface>(address_interface[addr].first, IncludeVersion())
|
|
|
|
.reboot.send(RebootRequest(false, check, duration));
|
2020-11-20 08:43:39 +08:00
|
|
|
return 1;
|
2020-07-31 07:07:54 +08:00
|
|
|
}
|
|
|
|
|
2021-01-04 14:45:09 +08:00
|
|
|
Future<int64_t> DatabaseContext::rebootWorker(StringRef addr, bool check, int duration) {
|
2020-11-24 06:55:29 +08:00
|
|
|
return rebootWorkerActor(this, addr, check, duration);
|
2020-09-15 07:45:34 +08:00
|
|
|
}
|
2021-01-21 16:42:23 +08:00
|
|
|
|
|
|
|
Future<Void> DatabaseContext::forceRecoveryWithDataLoss(StringRef dcId) {
|
|
|
|
return forceRecovery(getConnectionFile(), dcId);
|
|
|
|
}
|
2021-01-27 03:53:35 +08:00
|
|
|
|
2021-02-09 14:17:16 +08:00
|
|
|
ACTOR static Future<Void> createSnapshotActor(DatabaseContext* cx, UID snapUID, StringRef snapCmd) {
|
|
|
|
wait(mgmtSnapCreate(cx->clone(), snapCmd, snapUID));
|
|
|
|
return Void();
|
2021-02-06 06:14:38 +08:00
|
|
|
}
|
|
|
|
|
2021-03-11 02:06:03 +08:00
|
|
|
Future<Void> DatabaseContext::createSnapshot(StringRef uid, StringRef snapshot_command) {
|
2021-02-09 14:17:16 +08:00
|
|
|
std::string uid_str = uid.toString();
|
|
|
|
if (!std::all_of(uid_str.begin(), uid_str.end(), [](unsigned char c) { return std::isxdigit(c); }) ||
|
|
|
|
uid_str.size() != 32) {
|
|
|
|
// only 32-length hex string is considered as a valid UID
|
|
|
|
throw snap_invalid_uid_string();
|
|
|
|
}
|
|
|
|
return createSnapshotActor(this, UID::fromString(uid_str), snapshot_command);
|
2021-01-27 03:53:35 +08:00
|
|
|
}
|