Merge branch 'release-6.2' into dd-use-available-space
# Conflicts: # fdbserver/DataDistribution.actor.cpp # fdbserver/DataDistribution.actor.h # fdbserver/DataDistributionQueue.actor.cpp
This commit is contained in:
commit
e1fb568fd1
|
@ -54,7 +54,8 @@ type RangeOptions struct {
|
||||||
// Reverse indicates that the read should be performed in lexicographic
|
// Reverse indicates that the read should be performed in lexicographic
|
||||||
// (false) or reverse lexicographic (true) order. When Reverse is true and
|
// (false) or reverse lexicographic (true) order. When Reverse is true and
|
||||||
// Limit is non-zero, the last Limit key-value pairs in the range are
|
// Limit is non-zero, the last Limit key-value pairs in the range are
|
||||||
// returned.
|
// returned. Reading ranges in reverse is supported natively by the
|
||||||
|
// database and should have minimal extra cost.
|
||||||
Reverse bool
|
Reverse bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -184,7 +184,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
*
|
*
|
||||||
* @return a handle to access the results of the asynchronous call
|
* @return a handle to access the results of the asynchronous call
|
||||||
*/
|
*/
|
||||||
|
@ -205,7 +207,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
* @param mode provide a hint about how the results are to be used. This
|
* @param mode provide a hint about how the results are to be used. This
|
||||||
* can provide speed improvements or efficiency gains based on the caller's
|
* can provide speed improvements or efficiency gains based on the caller's
|
||||||
* knowledge of the upcoming access pattern.
|
* knowledge of the upcoming access pattern.
|
||||||
|
@ -272,7 +276,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
*
|
*
|
||||||
* @return a handle to access the results of the asynchronous call
|
* @return a handle to access the results of the asynchronous call
|
||||||
*/
|
*/
|
||||||
|
@ -293,7 +299,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
* @param mode provide a hint about how the results are to be used. This
|
* @param mode provide a hint about how the results are to be used. This
|
||||||
* can provide speed improvements or efficiency gains based on the caller's
|
* can provide speed improvements or efficiency gains based on the caller's
|
||||||
* knowledge of the upcoming access pattern.
|
* knowledge of the upcoming access pattern.
|
||||||
|
@ -369,7 +377,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
*
|
*
|
||||||
* @return a handle to access the results of the asynchronous call
|
* @return a handle to access the results of the asynchronous call
|
||||||
*/
|
*/
|
||||||
|
@ -393,7 +403,9 @@ public interface ReadTransaction extends ReadTransactionContext {
|
||||||
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
* <i>first</i> keys in the range. Pass {@link #ROW_LIMIT_UNLIMITED} if this query
|
||||||
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
* should not limit the number of results. If {@code reverse} is {@code true} rows
|
||||||
* will be limited starting at the end of the range.
|
* will be limited starting at the end of the range.
|
||||||
* @param reverse return results starting at the end of the range in reverse order
|
* @param reverse return results starting at the end of the range in reverse order.
|
||||||
|
* Reading ranges in reverse is supported natively by the database and should
|
||||||
|
* have minimal extra cost.
|
||||||
* @param mode provide a hint about how the results are to be used. This
|
* @param mode provide a hint about how the results are to be used. This
|
||||||
* can provide speed improvements or efficiency gains based on the caller's
|
* can provide speed improvements or efficiency gains based on the caller's
|
||||||
* knowledge of the upcoming access pattern.
|
* knowledge of the upcoming access pattern.
|
||||||
|
|
|
@ -530,8 +530,7 @@ Applications must provide error handling and an appropriate retry loop around th
|
||||||
|snapshot|
|
|snapshot|
|
||||||
|
|
||||||
``reverse``
|
``reverse``
|
||||||
|
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||||
If non-zero, key-value pairs will be returned in reverse lexicographical order beginning at the end of the range.
|
|
||||||
|
|
||||||
.. type:: FDBStreamingMode
|
.. type:: FDBStreamingMode
|
||||||
|
|
||||||
|
|
|
@ -287,7 +287,7 @@ A |database-blurb1| |database-blurb2|
|
||||||
|
|
||||||
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
||||||
|
|
||||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order.
|
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||||
|
|
||||||
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently.
|
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how to retrieve the specified range. This option should generally not be specified, allowing FoundationDB to retrieve the full range very efficiently.
|
||||||
|
|
||||||
|
@ -503,7 +503,7 @@ Reading data
|
||||||
|
|
||||||
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
If ``limit`` is specified, then only the first ``limit`` keys (and their values) in the range will be returned.
|
||||||
|
|
||||||
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order.
|
If ``reverse`` is True, then the last ``limit`` keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||||
|
|
||||||
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`.
|
If ``streaming_mode`` is specified, it must be a value from the :data:`StreamingMode` enumeration. It provides a hint to FoundationDB about how the returned container is likely to be used. The default is :data:`StreamingMode.iterator`.
|
||||||
|
|
||||||
|
|
|
@ -285,7 +285,7 @@ A |database-blurb1| |database-blurb2|
|
||||||
Only the first ``limit`` keys (and their values) in the range will be returned.
|
Only the first ``limit`` keys (and their values) in the range will be returned.
|
||||||
|
|
||||||
``:reverse``
|
``:reverse``
|
||||||
If ``true``, then the keys in the range will be returned in reverse order.
|
If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||||
|
|
||||||
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
||||||
|
|
||||||
|
@ -463,7 +463,7 @@ Reading data
|
||||||
Only the first ``limit`` keys (and their values) in the range will be returned.
|
Only the first ``limit`` keys (and their values) in the range will be returned.
|
||||||
|
|
||||||
``:reverse``
|
``:reverse``
|
||||||
If true, then the keys in the range will be returned in reverse order.
|
If ``true``, then the keys in the range will be returned in reverse order. Reading ranges in reverse is supported natively by the database and should have minimal extra cost.
|
||||||
|
|
||||||
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
If ``:limit`` is also specified, the *last* ``limit`` keys in the range will be returned in reverse order.
|
||||||
|
|
||||||
|
|
|
@ -5,6 +5,11 @@ Release Notes
|
||||||
6.2.16
|
6.2.16
|
||||||
======
|
======
|
||||||
|
|
||||||
|
Performance
|
||||||
|
-----------
|
||||||
|
|
||||||
|
* Reverse range reads could read too much data from disk, resulting in poor performance relative to forward range reads. `(PR #2650) <https://github.com/apple/foundationdb./pull/2650>`_.
|
||||||
|
|
||||||
Fixes
|
Fixes
|
||||||
-----
|
-----
|
||||||
|
|
||||||
|
|
|
@ -1600,9 +1600,9 @@ ACTOR Future<Void> timeWarning( double when, const char* msg ) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> checkStatus(Future<Void> f, Reference<ClusterConnectionFile> clusterFile, bool displayDatabaseAvailable = true) {
|
ACTOR Future<Void> checkStatus(Future<Void> f, Database db, bool displayDatabaseAvailable = true) {
|
||||||
wait(f);
|
wait(f);
|
||||||
StatusObject s = wait(StatusClient::statusFetcher(clusterFile));
|
StatusObject s = wait(StatusClient::statusFetcher(db));
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable);
|
printStatus(s, StatusClient::MINIMAL, displayDatabaseAvailable);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
|
@ -1644,7 +1644,7 @@ ACTOR Future<bool> configure( Database db, std::vector<StringRef> tokens, Refere
|
||||||
|
|
||||||
state Optional<ConfigureAutoResult> conf;
|
state Optional<ConfigureAutoResult> conf;
|
||||||
if( tokens[startToken] == LiteralStringRef("auto") ) {
|
if( tokens[startToken] == LiteralStringRef("auto") ) {
|
||||||
StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( ccf )) );
|
StatusObject s = wait( makeInterruptable(StatusClient::statusFetcher( db )) );
|
||||||
if(warn.isValid())
|
if(warn.isValid())
|
||||||
warn.cancel();
|
warn.cancel();
|
||||||
|
|
||||||
|
@ -2061,7 +2061,7 @@ ACTOR Future<bool> exclude( Database db, std::vector<StringRef> tokens, Referenc
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!force) {
|
if(!force) {
|
||||||
StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( ccf ) ) );
|
StatusObject status = wait( makeInterruptable( StatusClient::statusFetcher( db ) ) );
|
||||||
|
|
||||||
state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
|
state std::string errorString = "ERROR: Could not calculate the impact of this exclude on the total free space in the cluster.\n"
|
||||||
"Please try the exclude again in 30 seconds.\n"
|
"Please try the exclude again in 30 seconds.\n"
|
||||||
|
@ -2636,7 +2636,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
|
|
||||||
if (!opt.exec.present()) {
|
if (!opt.exec.present()) {
|
||||||
if(opt.initialStatusCheck) {
|
if(opt.initialStatusCheck) {
|
||||||
Future<Void> checkStatusF = checkStatus(Void(), db->getConnectionFile());
|
Future<Void> checkStatusF = checkStatus(Void(), db);
|
||||||
wait(makeInterruptable(success(checkStatusF)));
|
wait(makeInterruptable(success(checkStatusF)));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -2674,7 +2674,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
linenoise.historyAdd(line);
|
linenoise.historyAdd(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db->getConnectionFile());
|
warn = checkStatus(timeWarning(5.0, "\nWARNING: Long delay (Ctrl-C to interrupt)\n"), db);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
state UID randomID = deterministicRandom()->randomUniqueID();
|
state UID randomID = deterministicRandom()->randomUniqueID();
|
||||||
|
@ -2819,7 +2819,7 @@ ACTOR Future<int> cli(CLIOptions opt, LineNoise* plinenoise) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db->getConnectionFile())));
|
StatusObject s = wait(makeInterruptable(StatusClient::statusFetcher(db)));
|
||||||
|
|
||||||
if (!opt.exec.present()) printf("\n");
|
if (!opt.exec.present()) printf("\n");
|
||||||
printStatus(s, level);
|
printStatus(s, level);
|
||||||
|
|
|
@ -1959,8 +1959,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!g_network->isSimulated() && !forceAction) {
|
if (!g_network->isSimulated() && !forceAction) {
|
||||||
state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src->getConnectionFile()));
|
state StatusObject srcStatus = wait(StatusClient::statusFetcher(backupAgent->taskBucket->src));
|
||||||
StatusObject destStatus = wait(StatusClient::statusFetcher(dest->getConnectionFile()));
|
StatusObject destStatus = wait(StatusClient::statusFetcher(dest));
|
||||||
checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName);
|
checkAtomicSwitchOverConfig(srcStatus, destStatus, tagName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -191,6 +191,10 @@ public:
|
||||||
Future<Void> clientInfoMonitor;
|
Future<Void> clientInfoMonitor;
|
||||||
Future<Void> connected;
|
Future<Void> connected;
|
||||||
|
|
||||||
|
Reference<AsyncVar<Optional<ClusterInterface>>> statusClusterInterface;
|
||||||
|
Future<Void> statusLeaderMon;
|
||||||
|
double lastStatusFetch;
|
||||||
|
|
||||||
int apiVersion;
|
int apiVersion;
|
||||||
|
|
||||||
int mvCacheInsertLocation;
|
int mvCacheInsertLocation;
|
||||||
|
|
|
@ -46,6 +46,7 @@ ClientKnobs::ClientKnobs(bool randomize) {
|
||||||
init( CLIENT_EXAMPLE_AMOUNT, 20 );
|
init( CLIENT_EXAMPLE_AMOUNT, 20 );
|
||||||
init( MAX_CLIENT_STATUS_AGE, 1.0 );
|
init( MAX_CLIENT_STATUS_AGE, 1.0 );
|
||||||
init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1;
|
init( MAX_PROXY_CONNECTIONS, 5 ); if( randomize && BUGGIFY ) MAX_PROXY_CONNECTIONS = 1;
|
||||||
|
init( STATUS_IDLE_TIMEOUT, 120.0 );
|
||||||
|
|
||||||
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ public:
|
||||||
int CLIENT_EXAMPLE_AMOUNT;
|
int CLIENT_EXAMPLE_AMOUNT;
|
||||||
double MAX_CLIENT_STATUS_AGE;
|
double MAX_CLIENT_STATUS_AGE;
|
||||||
int MAX_PROXY_CONNECTIONS;
|
int MAX_PROXY_CONNECTIONS;
|
||||||
|
double STATUS_IDLE_TIMEOUT;
|
||||||
|
|
||||||
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
// wrong_shard_server sometimes comes from the only nonfailed server, so we need to avoid a fast spin
|
||||||
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
|
double WRONG_SHARD_SERVER_DELAY; // SOMEDAY: This delay can limit performance of retrieving data when the cache is mostly wrong (e.g. dumping the database after a test)
|
||||||
|
|
|
@ -1165,8 +1165,8 @@ Optional<Value> getValueFromJSON(StatusObject statusObj) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Optional<Value>> getJSON(Reference<ClusterConnectionFile> clusterFile) {
|
ACTOR Future<Optional<Value>> getJSON(Database db) {
|
||||||
StatusObject statusObj = wait(StatusClient::statusFetcher(clusterFile));
|
StatusObject statusObj = wait(StatusClient::statusFetcher(db));
|
||||||
return getValueFromJSON(statusObj);
|
return getValueFromJSON(statusObj);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1194,7 +1194,7 @@ Future< Optional<Value> > ReadYourWritesTransaction::get( const Key& key, bool s
|
||||||
|
|
||||||
if (key == LiteralStringRef("\xff\xff/status/json")){
|
if (key == LiteralStringRef("\xff\xff/status/json")){
|
||||||
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
|
if (tr.getDatabase().getPtr() && tr.getDatabase()->getConnectionFile()) {
|
||||||
return getJSON(tr.getDatabase()->getConnectionFile());
|
return getJSON(tr.getDatabase());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return Optional<Value>();
|
return Optional<Value>();
|
||||||
|
|
|
@ -452,7 +452,7 @@ StatusObject getClientDatabaseStatus(StatusObjectReader client, StatusObjectRead
|
||||||
return databaseStatus;
|
return databaseStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f ) {
|
ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f, Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface) {
|
||||||
if (!g_network) throw network_not_setup();
|
if (!g_network) throw network_not_setup();
|
||||||
|
|
||||||
state StatusObject statusObj;
|
state StatusObject statusObj;
|
||||||
|
@ -462,13 +462,10 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
|
||||||
// This could be read from the JSON but doing so safely is ugly so using a real var.
|
// This could be read from the JSON but doing so safely is ugly so using a real var.
|
||||||
state bool quorum_reachable = false;
|
state bool quorum_reachable = false;
|
||||||
state int coordinatorsFaultTolerance = 0;
|
state int coordinatorsFaultTolerance = 0;
|
||||||
state Reference<AsyncVar<Optional<ClusterInterface>>> clusterInterface(new AsyncVar<Optional<ClusterInterface>>);
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
state int64_t clientTime = time(0);
|
state int64_t clientTime = time(0);
|
||||||
|
|
||||||
state Future<Void> leaderMon = monitorLeader<ClusterInterface>(f, clusterInterface);
|
|
||||||
|
|
||||||
StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
|
StatusObject _statusObjClient = wait(clientStatusFetcher(f, &clientMessages, &quorum_reachable, &coordinatorsFaultTolerance));
|
||||||
statusObjClient = _statusObjClient;
|
statusObjClient = _statusObjClient;
|
||||||
|
|
||||||
|
@ -548,6 +545,23 @@ ACTOR Future<StatusObject> statusFetcherImpl( Reference<ClusterConnectionFile> f
|
||||||
return statusObj;
|
return statusObj;
|
||||||
}
|
}
|
||||||
|
|
||||||
Future<StatusObject> StatusClient::statusFetcher( Reference<ClusterConnectionFile> clusterFile ) {
|
ACTOR Future<Void> timeoutMonitorLeader(Database db) {
|
||||||
return statusFetcherImpl(clusterFile);
|
state Future<Void> leadMon = monitorLeader<ClusterInterface>(db->getConnectionFile(), db->statusClusterInterface);
|
||||||
|
loop {
|
||||||
|
wait(delay(CLIENT_KNOBS->STATUS_IDLE_TIMEOUT + 0.00001 + db->lastStatusFetch - now()));
|
||||||
|
if(now() - db->lastStatusFetch > CLIENT_KNOBS->STATUS_IDLE_TIMEOUT) {
|
||||||
|
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>();
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Future<StatusObject> StatusClient::statusFetcher( Database db ) {
|
||||||
|
db->lastStatusFetch = now();
|
||||||
|
if(!db->statusClusterInterface) {
|
||||||
|
db->statusClusterInterface = Reference<AsyncVar<Optional<ClusterInterface>>>(new AsyncVar<Optional<ClusterInterface>>);
|
||||||
|
db->statusLeaderMon = timeoutMonitorLeader(db);
|
||||||
|
}
|
||||||
|
|
||||||
|
return statusFetcherImpl(db->getConnectionFile(), db->statusClusterInterface);
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,11 +23,12 @@
|
||||||
|
|
||||||
#include "flow/flow.h"
|
#include "flow/flow.h"
|
||||||
#include "fdbclient/Status.h"
|
#include "fdbclient/Status.h"
|
||||||
|
#include "fdbclient/DatabaseContext.h"
|
||||||
|
|
||||||
class StatusClient {
|
class StatusClient {
|
||||||
public:
|
public:
|
||||||
enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 };
|
enum StatusLevel { MINIMAL = 0, NORMAL = 1, DETAILED = 2, JSON = 3 };
|
||||||
static Future<StatusObject> statusFetcher(Reference<ClusterConnectionFile> clusterFile);
|
static Future<StatusObject> statusFetcher(Database db);
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
|
@ -191,6 +191,7 @@ struct YieldMockNetwork : INetwork, ReferenceCounted<YieldMockNetwork> {
|
||||||
virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
|
virtual TaskPriority getCurrentTask() { return baseNetwork->getCurrentTask(); }
|
||||||
virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
|
virtual void setCurrentTask(TaskPriority taskID) { baseNetwork->setCurrentTask(taskID); }
|
||||||
virtual double now() { return baseNetwork->now(); }
|
virtual double now() { return baseNetwork->now(); }
|
||||||
|
virtual double timer() { return baseNetwork->timer(); }
|
||||||
virtual void stop() { return baseNetwork->stop(); }
|
virtual void stop() { return baseNetwork->stop(); }
|
||||||
virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
|
virtual bool isSimulated() const { return baseNetwork->isSimulated(); }
|
||||||
virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
|
virtual void onMainThread(Promise<Void>&& signal, TaskPriority taskID) { return baseNetwork->onMainThread(std::move(signal), taskID); }
|
||||||
|
|
|
@ -1118,7 +1118,7 @@ void FlowTransport::removePeerReference(const Endpoint& endpoint, bool isStream)
|
||||||
.detail("Address", endpoint.getPrimaryAddress())
|
.detail("Address", endpoint.getPrimaryAddress())
|
||||||
.detail("Token", endpoint.token);
|
.detail("Token", endpoint.token);
|
||||||
}
|
}
|
||||||
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0) {
|
if(peer->peerReferences == 0 && peer->reliable.empty() && peer->unsent.empty() && peer->outstandingReplies==0 && peer->lastDataPacketSentTime < now() - FLOW_KNOBS->CONNECTION_MONITOR_UNREFERENCED_CLOSE_DELAY) {
|
||||||
peer->resetPing.trigger();
|
peer->resetPing.trigger();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -129,8 +129,7 @@ public:
|
||||||
std::vector<LocalityEntry> const& getEntries() const
|
std::vector<LocalityEntry> const& getEntries() const
|
||||||
{ return _entryArray; }
|
{ return _entryArray; }
|
||||||
|
|
||||||
std::vector<LocalityEntry> const& getMutableEntries() const
|
std::vector<LocalityEntry>& getMutableEntries() { return _mutableEntryArray; }
|
||||||
{ return _mutableEntryArray; }
|
|
||||||
|
|
||||||
std::vector<LocalityEntry> const& getGroupEntries() const
|
std::vector<LocalityEntry> const& getGroupEntries() const
|
||||||
{ return _localitygroup->_entryArray; }
|
{ return _localitygroup->_entryArray; }
|
||||||
|
@ -253,7 +252,7 @@ public:
|
||||||
|
|
||||||
while (nRandomItems > 0)
|
while (nRandomItems > 0)
|
||||||
{
|
{
|
||||||
if (nItemsLeft <= 0) {
|
if (nRandomItems > nItemsLeft || nItemsLeft <= 0) {
|
||||||
bComplete = false;
|
bComplete = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -479,6 +478,8 @@ public:
|
||||||
|
|
||||||
Reference<StringToIntMap> _keymap;
|
Reference<StringToIntMap> _keymap;
|
||||||
|
|
||||||
|
virtual std::vector<std::vector<AttribValue>> const& getKeyValueArray() const { return _keyValueArray; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual Reference<StringToIntMap>& getGroupValueMap()
|
virtual Reference<StringToIntMap>& getGroupValueMap()
|
||||||
{ return _localitygroup->getGroupValueMap(); }
|
{ return _localitygroup->getGroupValueMap(); }
|
||||||
|
|
|
@ -119,6 +119,8 @@ struct PolicyAcross : IReplicationPolicy, public ReferenceCounted<PolicyAcross>
|
||||||
explicit PolicyAcross(const PolicyAcross& other) : PolicyAcross(other._count, other._attribKey, other._policy) {}
|
explicit PolicyAcross(const PolicyAcross& other) : PolicyAcross(other._count, other._attribKey, other._policy) {}
|
||||||
virtual ~PolicyAcross();
|
virtual ~PolicyAcross();
|
||||||
virtual std::string name() const { return "Across"; }
|
virtual std::string name() const { return "Across"; }
|
||||||
|
std::string embeddedPolicyName() const { return _policy->name(); }
|
||||||
|
int getCount() const { return _count; }
|
||||||
virtual std::string info() const { return format("%s^%d x ", _attribKey.c_str(), _count) + _policy->info(); }
|
virtual std::string info() const { return format("%s^%d x ", _attribKey.c_str(), _count) + _policy->info(); }
|
||||||
virtual int maxResults() const { return _count * _policy->maxResults(); }
|
virtual int maxResults() const { return _count * _policy->maxResults(); }
|
||||||
virtual int depth() const { return 1 + _policy->depth(); }
|
virtual int depth() const { return 1 + _policy->depth(); }
|
||||||
|
|
|
@ -82,14 +82,55 @@ double ratePolicy(
|
||||||
return rating;
|
return rating;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool findBestPolicySet(
|
bool findBestPolicySetSimple(PolicyAcross* pa, Reference<LocalitySet> logServerSet, std::vector<LocalityEntry>& bestSet,
|
||||||
std::vector<LocalityEntry>& bestResults,
|
int desired) {
|
||||||
Reference<LocalitySet> & localitySet,
|
auto& mutableEntries = logServerSet->getMutableEntries();
|
||||||
Reference<IReplicationPolicy> const& policy,
|
deterministicRandom()->randomShuffle(mutableEntries);
|
||||||
unsigned int nMinItems,
|
// First make sure the current localitySet is able to fulfuill the policy
|
||||||
unsigned int nSelectTests,
|
std::set<std::string> attributeKeys;
|
||||||
unsigned int nPolicyTests)
|
AttribKey indexKey = logServerSet->keyIndex(*attributeKeys.begin());
|
||||||
{
|
int uniqueValueCount = logServerSet->getKeyValueArray()[indexKey._id].size();
|
||||||
|
int targetUniqueValueCount = pa->getCount();
|
||||||
|
bool found = false;
|
||||||
|
if (uniqueValueCount < targetUniqueValueCount) {
|
||||||
|
// logServerSet won't be able to fulfill the policy
|
||||||
|
found = false;
|
||||||
|
} else {
|
||||||
|
// Loop through all servers and, in each loop, try to choose `targetUniqueValueCount`
|
||||||
|
// servers, each of which has a unique attribute value
|
||||||
|
std::set<AttribValue> seen;
|
||||||
|
int upperBound = mutableEntries.size();
|
||||||
|
int i = 0;
|
||||||
|
while (bestSet.size() < desired) {
|
||||||
|
auto& item = mutableEntries[i];
|
||||||
|
Optional<AttribValue> value = logServerSet->getRecord(item._id)->getValue(indexKey);
|
||||||
|
if (value.present() && seen.find(value.get()) == seen.end()) {
|
||||||
|
seen.insert(value.get());
|
||||||
|
bestSet.push_back(item);
|
||||||
|
upperBound--;
|
||||||
|
if (i < upperBound) {
|
||||||
|
std::swap(mutableEntries[i], mutableEntries[upperBound]);
|
||||||
|
}
|
||||||
|
if (seen.size() == targetUniqueValueCount) {
|
||||||
|
seen.clear();
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
if (i == upperBound && bestSet.size() < desired) {
|
||||||
|
seen.clear();
|
||||||
|
i = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
found = true;
|
||||||
|
}
|
||||||
|
return found;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool findBestPolicySetExpensive(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
|
||||||
|
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems,
|
||||||
|
unsigned int nSelectTests, unsigned int nPolicyTests) {
|
||||||
bool bSucceeded = true;
|
bool bSucceeded = true;
|
||||||
Reference<LocalitySet> bestLocalitySet, testLocalitySet;
|
Reference<LocalitySet> bestLocalitySet, testLocalitySet;
|
||||||
std::vector<LocalityEntry> results;
|
std::vector<LocalityEntry> results;
|
||||||
|
@ -113,9 +154,7 @@ bool findBestPolicySet(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get some additional random items, if needed
|
// Get some additional random items, if needed
|
||||||
if ((nMinItems > results.size()) &&
|
if ((nMinItems > results.size()) && (!localitySet->random(results, results, nMinItems - results.size()))) {
|
||||||
(!localitySet->random(results, results, nMinItems-results.size())))
|
|
||||||
{
|
|
||||||
bSucceeded = false;
|
bSucceeded = false;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -158,6 +197,55 @@ bool findBestPolicySet(
|
||||||
return bSucceeded;
|
return bSucceeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool findBestPolicySet(std::vector<LocalityEntry>& bestResults, Reference<LocalitySet>& localitySet,
|
||||||
|
Reference<IReplicationPolicy> const& policy, unsigned int nMinItems, unsigned int nSelectTests,
|
||||||
|
unsigned int nPolicyTests) {
|
||||||
|
|
||||||
|
bool bestFound = false;
|
||||||
|
|
||||||
|
// Specialization for policies of shape:
|
||||||
|
// - PolicyOne()
|
||||||
|
// - PolicyAcross(,"zoneId",PolicyOne())
|
||||||
|
// - TODO: More specializations for common policies
|
||||||
|
if (policy->name() == "One") {
|
||||||
|
bestFound = true;
|
||||||
|
int count = 0;
|
||||||
|
auto& mutableEntries = localitySet->getMutableEntries();
|
||||||
|
deterministicRandom()->randomShuffle(mutableEntries);
|
||||||
|
for (auto const& entry : mutableEntries) {
|
||||||
|
bestResults.push_back(entry);
|
||||||
|
if (++count == nMinItems) break;
|
||||||
|
}
|
||||||
|
} else if (policy->name() == "Across") {
|
||||||
|
PolicyAcross* pa = (PolicyAcross*)policy.getPtr();
|
||||||
|
std::set<std::string> attributeKeys;
|
||||||
|
pa->attributeKeys(&attributeKeys);
|
||||||
|
if (pa->embeddedPolicyName() == "One" && attributeKeys.size() == 1 &&
|
||||||
|
*attributeKeys.begin() == "zoneId" // This algorithm can actually apply to any field
|
||||||
|
) {
|
||||||
|
bestFound = findBestPolicySetSimple(pa, localitySet, bestResults, nMinItems);
|
||||||
|
if (bestFound && g_network->isSimulated()) {
|
||||||
|
std::vector<LocalityEntry> oldBest;
|
||||||
|
auto oldBestFound =
|
||||||
|
findBestPolicySetExpensive(oldBest, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||||
|
if (!oldBestFound) {
|
||||||
|
TraceEvent(SevError, "FBPSMissmatch").detail("Policy", policy->info());
|
||||||
|
} else {
|
||||||
|
auto set = localitySet->restrict(bestResults);
|
||||||
|
auto oldSet = localitySet->restrict(oldBest);
|
||||||
|
ASSERT_WE_THINK(ratePolicy(set, policy, nPolicyTests) <= ratePolicy(oldSet, policy, nPolicyTests));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bestFound =
|
||||||
|
findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
bestFound = findBestPolicySetExpensive(bestResults, localitySet, policy, nMinItems, nSelectTests, nPolicyTests);
|
||||||
|
}
|
||||||
|
return bestFound;
|
||||||
|
}
|
||||||
|
|
||||||
bool findBestUniquePolicySet(
|
bool findBestUniquePolicySet(
|
||||||
std::vector<LocalityEntry>& bestResults,
|
std::vector<LocalityEntry>& bestResults,
|
||||||
Reference<LocalitySet> & localitySet,
|
Reference<LocalitySet> & localitySet,
|
||||||
|
|
|
@ -751,6 +751,12 @@ public:
|
||||||
// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
|
// Everything actually network related is delegated to the Sim2Net class; Sim2 is only concerned with simulating machines and time
|
||||||
virtual double now() { return time; }
|
virtual double now() { return time; }
|
||||||
|
|
||||||
|
// timer() can be up to one second ahead of now()
|
||||||
|
virtual double timer() {
|
||||||
|
timerTime += deterministicRandom()->random01()*(time+1.0-timerTime)/2.0;
|
||||||
|
return timerTime;
|
||||||
|
}
|
||||||
|
|
||||||
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
|
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) {
|
||||||
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
|
ASSERT(taskID >= TaskPriority::Min && taskID <= TaskPriority::Max);
|
||||||
return delay( seconds, taskID, currentProcess );
|
return delay( seconds, taskID, currentProcess );
|
||||||
|
@ -1588,7 +1594,7 @@ public:
|
||||||
machines.erase(machineId);
|
machines.erase(machineId);
|
||||||
}
|
}
|
||||||
|
|
||||||
Sim2() : time(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
|
Sim2() : time(0.0), timerTime(0.0), taskCount(0), yielded(false), yield_limit(0), currentTaskID(TaskPriority::Zero) {
|
||||||
// Not letting currentProcess be NULL eliminates some annoying special cases
|
// Not letting currentProcess be NULL eliminates some annoying special cases
|
||||||
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
|
currentProcess = new ProcessInfo("NoMachine", LocalityData(Optional<Standalone<StringRef>>(), StringRef(), StringRef(), StringRef()), ProcessClass(), {NetworkAddress()}, this, "", "");
|
||||||
g_network = net2 = newNet2(false, true);
|
g_network = net2 = newNet2(false, true);
|
||||||
|
@ -1624,6 +1630,7 @@ public:
|
||||||
else {
|
else {
|
||||||
mutex.enter();
|
mutex.enter();
|
||||||
this->time = t.time;
|
this->time = t.time;
|
||||||
|
this->timerTime = std::max(this->timerTime, this->time);
|
||||||
mutex.leave();
|
mutex.leave();
|
||||||
|
|
||||||
this->currentProcess = t.machine;
|
this->currentProcess = t.machine;
|
||||||
|
@ -1676,6 +1683,7 @@ public:
|
||||||
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
|
//time is guarded by ISimulator::mutex. It is not necessary to guard reads on the main thread because
|
||||||
//time should only be modified from the main thread.
|
//time should only be modified from the main thread.
|
||||||
double time;
|
double time;
|
||||||
|
double timerTime;
|
||||||
TaskPriority currentTaskID;
|
TaskPriority currentTaskID;
|
||||||
|
|
||||||
//taskCount is guarded by ISimulator::mutex
|
//taskCount is guarded by ISimulator::mutex
|
||||||
|
@ -1718,7 +1726,7 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
|
||||||
TEST( kt == ISimulator::RebootAndDelete ); // Simulated machine rebooted with data and coordination state deletion
|
TEST( kt == ISimulator::RebootAndDelete ); // Simulated machine rebooted with data and coordination state deletion
|
||||||
TEST( kt == ISimulator::RebootProcessAndDelete ); // Simulated process rebooted with data and coordination state deletion
|
TEST( kt == ISimulator::RebootProcessAndDelete ); // Simulated process rebooted with data and coordination state deletion
|
||||||
|
|
||||||
if( p->rebooting )
|
if( p->rebooting || !p->isReliable() )
|
||||||
return;
|
return;
|
||||||
TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detail("ZoneId", p->locality.zoneId()).detail("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).backtrace();
|
TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detail("ZoneId", p->locality.zoneId()).detail("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("Failed", p->failed).detail("Excluded", p->excluded).detail("Cleared", p->cleared).backtrace();
|
||||||
p->rebooting = true;
|
p->rebooting = true;
|
||||||
|
|
|
@ -51,25 +51,23 @@ struct WorkerInfo : NonCopyable {
|
||||||
ReplyPromise<RegisterWorkerReply> reply;
|
ReplyPromise<RegisterWorkerReply> reply;
|
||||||
Generation gen;
|
Generation gen;
|
||||||
int reboots;
|
int reboots;
|
||||||
double lastAvailableTime;
|
|
||||||
ProcessClass initialClass;
|
ProcessClass initialClass;
|
||||||
ClusterControllerPriorityInfo priorityInfo;
|
ClusterControllerPriorityInfo priorityInfo;
|
||||||
WorkerDetails details;
|
WorkerDetails details;
|
||||||
Future<Void> haltRatekeeper;
|
Future<Void> haltRatekeeper;
|
||||||
Future<Void> haltDistributor;
|
Future<Void> haltDistributor;
|
||||||
|
|
||||||
WorkerInfo() : gen(-1), reboots(0), lastAvailableTime(now()), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
WorkerInfo() : gen(-1), reboots(0), priorityInfo(ProcessClass::UnsetFit, false, ClusterControllerPriorityInfo::FitnessUnknown) {}
|
||||||
WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded ) :
|
WorkerInfo( Future<Void> watcher, ReplyPromise<RegisterWorkerReply> reply, Generation gen, WorkerInterface interf, ProcessClass initialClass, ProcessClass processClass, ClusterControllerPriorityInfo priorityInfo, bool degraded ) :
|
||||||
watcher(watcher), reply(reply), gen(gen), reboots(0), lastAvailableTime(now()), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {}
|
watcher(watcher), reply(reply), gen(gen), reboots(0), initialClass(initialClass), priorityInfo(priorityInfo), details(interf, processClass, degraded) {}
|
||||||
|
|
||||||
WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen),
|
WorkerInfo( WorkerInfo&& r ) BOOST_NOEXCEPT : watcher(std::move(r.watcher)), reply(std::move(r.reply)), gen(r.gen),
|
||||||
reboots(r.reboots), lastAvailableTime(r.lastAvailableTime), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)) {}
|
reboots(r.reboots), initialClass(r.initialClass), priorityInfo(r.priorityInfo), details(std::move(r.details)) {}
|
||||||
void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT {
|
void operator=( WorkerInfo&& r ) BOOST_NOEXCEPT {
|
||||||
watcher = std::move(r.watcher);
|
watcher = std::move(r.watcher);
|
||||||
reply = std::move(r.reply);
|
reply = std::move(r.reply);
|
||||||
gen = r.gen;
|
gen = r.gen;
|
||||||
reboots = r.reboots;
|
reboots = r.reboots;
|
||||||
lastAvailableTime = r.lastAvailableTime;
|
|
||||||
initialClass = r.initialClass;
|
initialClass = r.initialClass;
|
||||||
priorityInfo = r.priorityInfo;
|
priorityInfo = r.priorityInfo;
|
||||||
details = std::move(r.details);
|
details = std::move(r.details);
|
||||||
|
@ -339,7 +337,8 @@ public:
|
||||||
std::vector<LocalityData> tLocalities;
|
std::vector<LocalityData> tLocalities;
|
||||||
|
|
||||||
// Try to find the best team of servers to fulfill the policy
|
// Try to find the best team of servers to fulfill the policy
|
||||||
if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS, SERVER_KNOBS->POLICY_GENERATIONS)) {
|
if (findBestPolicySet(bestSet, logServerSet, policy, desired, SERVER_KNOBS->POLICY_RATING_TESTS,
|
||||||
|
SERVER_KNOBS->POLICY_GENERATIONS)) {
|
||||||
results.reserve(results.size() + bestSet.size());
|
results.reserve(results.size() + bestSet.size());
|
||||||
for (auto& entry : bestSet) {
|
for (auto& entry : bestSet) {
|
||||||
auto object = logServerMap->getObject(entry);
|
auto object = logServerMap->getObject(entry);
|
||||||
|
@ -381,8 +380,6 @@ public:
|
||||||
TraceEvent("GetTLogTeamDone").detail("Completed", bCompleted).detail("Policy", policy->info()).detail("Results", results.size()).detail("Processes", logServerSet->size()).detail("Workers", id_worker.size())
|
TraceEvent("GetTLogTeamDone").detail("Completed", bCompleted).detail("Policy", policy->info()).detail("Results", results.size()).detail("Processes", logServerSet->size()).detail("Workers", id_worker.size())
|
||||||
.detail("Required", required).detail("Desired", desired).detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS).detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS);
|
.detail("Required", required).detail("Desired", desired).detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS).detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS);
|
||||||
|
|
||||||
logServerSet->clear();
|
|
||||||
logServerSet.clear();
|
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
@ -395,7 +392,7 @@ public:
|
||||||
if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
|
if(satelliteFallback || region.satelliteTLogUsableDcsFallback == 0) {
|
||||||
throw no_more_servers();
|
throw no_more_servers();
|
||||||
} else {
|
} else {
|
||||||
if(now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
|
if(!goodRecruitmentTime.isReady()) {
|
||||||
throw operation_failed();
|
throw operation_failed();
|
||||||
}
|
}
|
||||||
satelliteFallback = true;
|
satelliteFallback = true;
|
||||||
|
@ -641,18 +638,8 @@ public:
|
||||||
result.logRouters.push_back(logRouters[i].interf);
|
result.logRouters.push_back(logRouters[i].interf);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(!remoteStartTime.present()) {
|
|
||||||
double maxAvailableTime = 0;
|
|
||||||
for(auto& it : result.remoteTLogs) {
|
|
||||||
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
|
|
||||||
}
|
|
||||||
for(auto& it : result.logRouters) {
|
|
||||||
maxAvailableTime = std::max(maxAvailableTime, id_worker[it.locality.processId()].lastAvailableTime);
|
|
||||||
}
|
|
||||||
remoteStartTime = maxAvailableTime;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( now() - remoteStartTime.get() < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY &&
|
if( !goodRemoteRecruitmentTime.isReady() &&
|
||||||
( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) ||
|
( ( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredRemoteLogs(), ProcessClass::TLog).betterCount(RoleFitness(remoteLogs, ProcessClass::TLog)) ) ||
|
||||||
( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) {
|
( RoleFitness(SERVER_KNOBS->EXPECTED_LOG_ROUTER_FITNESS, req.logRouterCount, ProcessClass::LogRouter).betterCount(RoleFitness(logRouters, ProcessClass::LogRouter)) ) ) ) {
|
||||||
throw operation_failed();
|
throw operation_failed();
|
||||||
|
@ -729,7 +716,7 @@ public:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
|
if( !goodRecruitmentTime.isReady() &&
|
||||||
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
||||||
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) ||
|
( region.satelliteTLogReplicationFactor > 0 && RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredSatelliteLogs(dcId), ProcessClass::TLog).betterCount(RoleFitness(satelliteLogs, ProcessClass::TLog)) ) ||
|
||||||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) ||
|
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(RoleFitness(proxies, ProcessClass::Proxy)) ||
|
||||||
|
@ -766,7 +753,7 @@ public:
|
||||||
}
|
}
|
||||||
throw no_more_servers();
|
throw no_more_servers();
|
||||||
} catch( Error& e ) {
|
} catch( Error& e ) {
|
||||||
if (now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY && regions[1].dcId != clusterControllerDcId.get()) {
|
if (!goodRemoteRecruitmentTime.isReady() && regions[1].dcId != clusterControllerDcId.get()) {
|
||||||
throw operation_failed();
|
throw operation_failed();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -884,7 +871,7 @@ public:
|
||||||
.detail("DesiredProxies", req.configuration.getDesiredProxies()).detail("ActualProxies", result.proxies.size())
|
.detail("DesiredProxies", req.configuration.getDesiredProxies()).detail("ActualProxies", result.proxies.size())
|
||||||
.detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size());
|
.detail("DesiredResolvers", req.configuration.getDesiredResolvers()).detail("ActualResolvers", result.resolvers.size());
|
||||||
|
|
||||||
if( now() - startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY &&
|
if( !goodRecruitmentTime.isReady() &&
|
||||||
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
( RoleFitness(SERVER_KNOBS->EXPECTED_TLOG_FITNESS, req.configuration.getDesiredLogs(), ProcessClass::TLog).betterCount(RoleFitness(tlogs, ProcessClass::TLog)) ||
|
||||||
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) ||
|
RoleFitness(SERVER_KNOBS->EXPECTED_PROXY_FITNESS, req.configuration.getDesiredProxies(), ProcessClass::Proxy).betterCount(bestFitness.proxy) ||
|
||||||
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) {
|
RoleFitness(SERVER_KNOBS->EXPECTED_RESOLVER_FITNESS, req.configuration.getDesiredResolvers(), ProcessClass::Resolver).betterCount(bestFitness.resolver) ) ) {
|
||||||
|
@ -1243,11 +1230,13 @@ public:
|
||||||
ActorCollection ac;
|
ActorCollection ac;
|
||||||
UpdateWorkerList updateWorkerList;
|
UpdateWorkerList updateWorkerList;
|
||||||
Future<Void> outstandingRequestChecker;
|
Future<Void> outstandingRequestChecker;
|
||||||
|
Future<Void> outstandingRemoteRequestChecker;
|
||||||
|
|
||||||
DBInfo db;
|
DBInfo db;
|
||||||
Database cx;
|
Database cx;
|
||||||
double startTime;
|
double startTime;
|
||||||
Optional<double> remoteStartTime;
|
Future<Void> goodRecruitmentTime;
|
||||||
|
Future<Void> goodRemoteRecruitmentTime;
|
||||||
Version datacenterVersionDifference;
|
Version datacenterVersionDifference;
|
||||||
bool versionDifferenceUpdated;
|
bool versionDifferenceUpdated;
|
||||||
PromiseStream<Future<Void>> addActor;
|
PromiseStream<Future<Void>> addActor;
|
||||||
|
@ -1271,8 +1260,9 @@ public:
|
||||||
|
|
||||||
ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality )
|
ClusterControllerData( ClusterControllerFullInterface const& ccInterface, LocalityData const& locality )
|
||||||
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()),
|
: clusterControllerProcessId(locality.processId()), clusterControllerDcId(locality.dcId()),
|
||||||
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), gotProcessClasses(false),
|
id(ccInterface.id()), ac(false), outstandingRequestChecker(Void()), outstandingRemoteRequestChecker(Void()), gotProcessClasses(false),
|
||||||
gotFullyRecoveredConfig(false), startTime(now()), datacenterVersionDifference(0),
|
gotFullyRecoveredConfig(false), startTime(now()), goodRecruitmentTime(Never()),
|
||||||
|
goodRemoteRecruitmentTime(Never()), datacenterVersionDifference(0),
|
||||||
versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false),
|
versionDifferenceUpdated(false), recruitingDistributor(false), recruitRatekeeper(false),
|
||||||
clusterControllerMetrics("ClusterController", id.toString()),
|
clusterControllerMetrics("ClusterController", id.toString()),
|
||||||
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
|
openDatabaseRequests("OpenDatabaseRequests", clusterControllerMetrics),
|
||||||
|
@ -1320,7 +1310,7 @@ ACTOR Future<Void> clusterWatchDatabase( ClusterControllerData* cluster, Cluster
|
||||||
id_used[cluster->clusterControllerProcessId]++;
|
id_used[cluster->clusterControllerProcessId]++;
|
||||||
state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used);
|
state WorkerFitnessInfo masterWorker = cluster->getWorkerForRoleInDatacenter(cluster->clusterControllerDcId, ProcessClass::Master, ProcessClass::NeverAssign, db->config, id_used);
|
||||||
if( ( masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId )
|
if( ( masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ) > SERVER_KNOBS->EXPECTED_MASTER_FITNESS || masterWorker.worker.interf.locality.processId() == cluster->clusterControllerProcessId )
|
||||||
&& now() - cluster->startTime < SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY ) {
|
&& !cluster->goodRecruitmentTime.isReady() ) {
|
||||||
TraceEvent("CCWDB", cluster->id).detail("Fitness", masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ));
|
TraceEvent("CCWDB", cluster->id).detail("Fitness", masterWorker.worker.processClass.machineClassFitness( ProcessClass::Master ));
|
||||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||||
continue;
|
continue;
|
||||||
|
@ -1594,9 +1584,11 @@ void checkBetterDDOrRK(ClusterControllerData* self) {
|
||||||
ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
||||||
try {
|
try {
|
||||||
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
|
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
|
||||||
|
while( !self->goodRecruitmentTime.isReady() ) {
|
||||||
|
wait(self->goodRecruitmentTime);
|
||||||
|
}
|
||||||
|
|
||||||
checkOutstandingRecruitmentRequests( self );
|
checkOutstandingRecruitmentRequests( self );
|
||||||
checkOutstandingRemoteRecruitmentRequests( self );
|
|
||||||
checkOutstandingStorageRequests( self );
|
checkOutstandingStorageRequests( self );
|
||||||
checkBetterDDOrRK(self);
|
checkBetterDDOrRK(self);
|
||||||
|
|
||||||
|
@ -1606,7 +1598,23 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
||||||
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().read().master.id());
|
TraceEvent("MasterRegistrationKill", self->id).detail("MasterId", self->db.serverInfo->get().read().master.id());
|
||||||
}
|
}
|
||||||
} catch( Error &e ) {
|
} catch( Error &e ) {
|
||||||
if(e.code() != error_code_operation_failed && e.code() != error_code_no_more_servers) {
|
if(e.code() != error_code_no_more_servers) {
|
||||||
|
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> doCheckOutstandingRemoteRequests( ClusterControllerData* self ) {
|
||||||
|
try {
|
||||||
|
wait( delay(SERVER_KNOBS->CHECK_OUTSTANDING_INTERVAL) );
|
||||||
|
while( !self->goodRemoteRecruitmentTime.isReady() ) {
|
||||||
|
wait(self->goodRemoteRecruitmentTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
checkOutstandingRemoteRecruitmentRequests( self );
|
||||||
|
} catch( Error &e ) {
|
||||||
|
if(e.code() != error_code_no_more_servers) {
|
||||||
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
TraceEvent(SevError, "CheckOutstandingError").error(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1614,10 +1622,13 @@ ACTOR Future<Void> doCheckOutstandingRequests( ClusterControllerData* self ) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void checkOutstandingRequests( ClusterControllerData* self ) {
|
void checkOutstandingRequests( ClusterControllerData* self ) {
|
||||||
if( !self->outstandingRequestChecker.isReady() )
|
if( self->outstandingRemoteRequestChecker.isReady() ) {
|
||||||
return;
|
self->outstandingRemoteRequestChecker = doCheckOutstandingRemoteRequests(self);
|
||||||
|
}
|
||||||
|
|
||||||
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
|
if( self->outstandingRequestChecker.isReady() ) {
|
||||||
|
self->outstandingRequestChecker = doCheckOutstandingRequests(self);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) {
|
ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Standalone<StringRef>> processID ) {
|
||||||
|
@ -1625,7 +1636,6 @@ ACTOR Future<Void> rebootAndCheck( ClusterControllerData* cluster, Optional<Stan
|
||||||
auto watcher = cluster->id_worker.find(processID);
|
auto watcher = cluster->id_worker.find(processID);
|
||||||
ASSERT(watcher != cluster->id_worker.end());
|
ASSERT(watcher != cluster->id_worker.end());
|
||||||
|
|
||||||
watcher->second.lastAvailableTime = now();
|
|
||||||
watcher->second.reboots++;
|
watcher->second.reboots++;
|
||||||
wait( delay( g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT ) );
|
wait( delay( g_network->isSimulated() ? SERVER_KNOBS->SIM_SHUTDOWN_TIMEOUT : SERVER_KNOBS->SHUTDOWN_TIMEOUT ) );
|
||||||
}
|
}
|
||||||
|
@ -1867,7 +1877,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
|
||||||
req.reply.send( rep );
|
req.reply.send( rep );
|
||||||
return Void();
|
return Void();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_no_more_servers && now() - self->startTime >= SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY) {
|
if (e.code() == error_code_no_more_servers && self->goodRecruitmentTime.isReady()) {
|
||||||
self->outstandingRecruitmentRequests.push_back( req );
|
self->outstandingRecruitmentRequests.push_back( req );
|
||||||
TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e);
|
TraceEvent(SevWarn, "RecruitFromConfigurationNotAvailable", self->id).error(e);
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1879,7 +1889,7 @@ ACTOR Future<Void> clusterRecruitFromConfiguration( ClusterControllerData* self,
|
||||||
throw; // goodbye, cluster controller
|
throw; // goodbye, cluster controller
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1895,7 +1905,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
|
||||||
req.reply.send( rep );
|
req.reply.send( rep );
|
||||||
return Void();
|
return Void();
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
if (e.code() == error_code_no_more_servers && self->remoteStartTime.present() && now() - self->remoteStartTime.get() >= SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY) {
|
if (e.code() == error_code_no_more_servers && self->goodRemoteRecruitmentTime.isReady()) {
|
||||||
self->outstandingRemoteRecruitmentRequests.push_back( req );
|
self->outstandingRemoteRecruitmentRequests.push_back( req );
|
||||||
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
|
TraceEvent(SevWarn, "RecruitRemoteFromConfigurationNotAvailable", self->id).error(e);
|
||||||
return Void();
|
return Void();
|
||||||
|
@ -1907,7 +1917,7 @@ ACTOR Future<Void> clusterRecruitRemoteFromConfiguration( ClusterControllerData*
|
||||||
throw; // goodbye, cluster controller
|
throw; // goodbye, cluster controller
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2010,6 +2020,8 @@ void registerWorker( RegisterWorkerRequest req, ClusterControllerData *self ) {
|
||||||
|
|
||||||
if(info == self->id_worker.end()) {
|
if(info == self->id_worker.end()) {
|
||||||
TraceEvent("ClusterControllerActualWorkers", self->id).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
TraceEvent("ClusterControllerActualWorkers", self->id).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
||||||
|
self->goodRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_RECRUITMENT_DELAY);
|
||||||
|
self->goodRemoteRecruitmentTime = lowPriorityDelay(SERVER_KNOBS->WAIT_FOR_GOOD_REMOTE_RECRUITMENT_DELAY);
|
||||||
} else {
|
} else {
|
||||||
TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id).suppressFor(1.0).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
TraceEvent("ClusterControllerWorkerAlreadyRegistered", self->id).suppressFor(1.0).detail("WorkerId",w.id()).detail("ProcessId", w.locality.processId()).detail("ZoneId", w.locality.zoneId()).detail("DataHall", w.locality.dataHallId()).detail("PClass", req.processClass.toString()).detail("Workers", self->id_worker.size());
|
||||||
}
|
}
|
||||||
|
@ -2674,7 +2686,7 @@ ACTOR Future<DataDistributorInterface> startDataDistributor( ClusterControllerDa
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2748,7 +2760,7 @@ ACTOR Future<Void> startRatekeeper(ClusterControllerData *self) {
|
||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
wait( delay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
wait( lowPriorityDelay(SERVER_KNOBS->ATTEMPT_RECRUITMENT_DELAY) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -401,7 +401,7 @@ struct LeaderRegisterCollection {
|
||||||
if( !self->pStore->exists() )
|
if( !self->pStore->exists() )
|
||||||
return Void();
|
return Void();
|
||||||
OnDemandStore &store = *self->pStore;
|
OnDemandStore &store = *self->pStore;
|
||||||
Standalone<VectorRef<KeyValueRef>> forwardingInfo = wait( store->readRange( fwdKeys ) );
|
Standalone<RangeResultRef> forwardingInfo = wait( store->readRange( fwdKeys ) );
|
||||||
for( int i = 0; i < forwardingInfo.size(); i++ ) {
|
for( int i = 0; i < forwardingInfo.size(); i++ ) {
|
||||||
LeaderInfo forwardInfo;
|
LeaderInfo forwardInfo;
|
||||||
forwardInfo.forward = true;
|
forwardInfo.forward = true;
|
||||||
|
|
|
@ -293,8 +293,8 @@ public:
|
||||||
return minRatio;
|
return minRatio;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool hasHealthyAvailableSpace() {
|
virtual bool hasHealthyAvailableSpace(double minRatio, int64_t minAvailableSpace) {
|
||||||
return getMinAvailableSpaceRatio() > SERVER_KNOBS->MIN_FREE_SPACE_RATIO && getMinAvailableSpace() > SERVER_KNOBS->MIN_FREE_SPACE;
|
return (minRatio == 0 || getMinAvailableSpaceRatio() > minRatio) && (minAvailableSpace == std::numeric_limits<int64_t>::min() || getMinAvailableSpace() > minAvailableSpace);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual Future<Void> updateStorageMetrics() {
|
virtual Future<Void> updateStorageMetrics() {
|
||||||
|
@ -758,6 +758,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
std::vector<Reference<IDataDistributionTeam>> randomTeams;
|
std::vector<Reference<IDataDistributionTeam>> randomTeams;
|
||||||
const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end());
|
const std::set<UID> completeSources(req.completeSources.begin(), req.completeSources.end());
|
||||||
|
|
||||||
|
// Note: this block does not apply any filters from the request
|
||||||
if( !req.wantsNewServers ) {
|
if( !req.wantsNewServers ) {
|
||||||
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
||||||
if( !self->server_info.count( req.completeSources[i] ) ) {
|
if( !self->server_info.count( req.completeSources[i] ) ) {
|
||||||
|
@ -773,10 +774,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(found && teamList[j]->isHealthy() &&
|
if(found && teamList[j]->isHealthy()) {
|
||||||
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(teamList[j]->getServerIDs(), self->primary)).size() > 0) &&
|
|
||||||
teamList[j]->getMinAvailableSpaceRatio() >= req.minAvailableSpaceRatio)
|
|
||||||
{
|
|
||||||
req.reply.send( teamList[j] );
|
req.reply.send( teamList[j] );
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
@ -788,9 +786,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
ASSERT( !bestOption.present() );
|
ASSERT( !bestOption.present() );
|
||||||
for( int i = 0; i < self->teams.size(); i++ ) {
|
for( int i = 0; i < self->teams.size(); i++ ) {
|
||||||
if (self->teams[i]->isHealthy() &&
|
if (self->teams[i]->isHealthy() &&
|
||||||
(!req.preferLowerUtilization || self->teams[i]->hasHealthyAvailableSpace()) &&
|
self->teams[i]->hasHealthyAvailableSpace(req.minAvailableSpaceRatio, req.preferLowerUtilization ? SERVER_KNOBS->MIN_FREE_SPACE : std::numeric_limits<int64_t>::min()) &&
|
||||||
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(self->teams[i]->getServerIDs(), self->primary)) .size() > 0) &&
|
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(self->teams[i]->getServerIDs(), self->primary)).size() > 0))
|
||||||
self->teams[i]->getMinAvailableSpaceRatio() >= req.minAvailableSpaceRatio)
|
|
||||||
{
|
{
|
||||||
int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty);
|
int64_t loadBytes = self->teams[i]->getLoadBytes(true, req.inflightPenalty);
|
||||||
if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) {
|
if( !bestOption.present() || ( req.preferLowerUtilization && loadBytes < bestLoadBytes ) || ( !req.preferLowerUtilization && loadBytes > bestLoadBytes ) ) {
|
||||||
|
@ -806,9 +803,8 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
|
Reference<IDataDistributionTeam> dest = deterministicRandom()->randomChoice(self->teams);
|
||||||
|
|
||||||
bool ok = dest->isHealthy() &&
|
bool ok = dest->isHealthy() &&
|
||||||
(!req.preferLowerUtilization || dest->hasHealthyAvailableSpace()) &&
|
dest->hasHealthyAvailableSpace(req.minAvailableSpaceRatio, req.preferLowerUtilization ? SERVER_KNOBS->MIN_FREE_SPACE : std::numeric_limits<int64_t>::min()) &&
|
||||||
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(dest->getServerIDs(), self->primary)).size() > 0) &&
|
(!req.teamMustHaveShards || self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(dest->getServerIDs(), self->primary)).size() > 0);
|
||||||
dest->getMinAvailableSpaceRatio() >= req.minAvailableSpaceRatio;
|
|
||||||
|
|
||||||
for(int i=0; ok && i<randomTeams.size(); i++) {
|
for(int i=0; ok && i<randomTeams.size(); i++) {
|
||||||
if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) {
|
if (randomTeams[i]->getServerIDs() == dest->getServerIDs()) {
|
||||||
|
@ -834,6 +830,7 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
|
|
||||||
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
|
// Note: req.completeSources can be empty and all servers (and server teams) can be unhealthy.
|
||||||
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
|
// We will get stuck at this! This only happens when a DC fails. No need to consider it right now.
|
||||||
|
// Note: this block does not apply any filters from the request
|
||||||
if(!bestOption.present() && self->zeroHealthyTeams->get()) {
|
if(!bestOption.present() && self->zeroHealthyTeams->get()) {
|
||||||
//Attempt to find the unhealthy source server team and return it
|
//Attempt to find the unhealthy source server team and return it
|
||||||
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
for( int i = 0; i < req.completeSources.size(); i++ ) {
|
||||||
|
@ -844,11 +841,6 @@ struct DDTeamCollection : ReferenceCounted<DDTeamCollection> {
|
||||||
for( int j = 0; j < teamList.size(); j++ ) {
|
for( int j = 0; j < teamList.size(); j++ ) {
|
||||||
bool found = true;
|
bool found = true;
|
||||||
auto serverIDs = teamList[j]->getServerIDs();
|
auto serverIDs = teamList[j]->getServerIDs();
|
||||||
if((req.teamMustHaveShards && self->shardsAffectedByTeamFailure->getShardsFor(ShardsAffectedByTeamFailure::Team(serverIDs, self->primary)).size() == 0) ||
|
|
||||||
teamList[j]->getMinAvailableSpaceRatio() < req.minAvailableSpaceRatio)
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
for( int k = 0; k < teamList[j]->size(); k++ ) {
|
for( int k = 0; k < teamList[j]->size(); k++ ) {
|
||||||
if( !completeSources.count( serverIDs[k] ) ) {
|
if( !completeSources.count( serverIDs[k] ) ) {
|
||||||
found = false;
|
found = false;
|
||||||
|
|
|
@ -47,7 +47,7 @@ struct IDataDistributionTeam {
|
||||||
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0;
|
virtual int64_t getLoadBytes( bool includeInFlight = true, double inflightPenalty = 1.0 ) = 0;
|
||||||
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) = 0;
|
virtual int64_t getMinAvailableSpace( bool includeInFlight = true ) = 0;
|
||||||
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) = 0;
|
virtual double getMinAvailableSpaceRatio( bool includeInFlight = true ) = 0;
|
||||||
virtual bool hasHealthyAvailableSpace() = 0;
|
virtual bool hasHealthyAvailableSpace( double minRatio, int64_t minAvailableSpace ) = 0;
|
||||||
virtual Future<Void> updateStorageMetrics() = 0;
|
virtual Future<Void> updateStorageMetrics() = 0;
|
||||||
virtual void addref() = 0;
|
virtual void addref() = 0;
|
||||||
virtual void delref() = 0;
|
virtual void delref() = 0;
|
||||||
|
|
|
@ -186,9 +186,9 @@ public:
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual bool hasHealthyAvailableSpace() {
|
virtual bool hasHealthyAvailableSpace(double minRatio, int64_t minAvailableSpace) {
|
||||||
return all([](Reference<IDataDistributionTeam> team) {
|
return all([minRatio, minAvailableSpace](Reference<IDataDistributionTeam> team) {
|
||||||
return team->hasHealthyAvailableSpace();
|
return team->hasHealthyAvailableSpace(minRatio, minAvailableSpace);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -929,7 +929,7 @@ ACTOR Future<Void> dataDistributionRelocator( DDQueueData *self, RelocateData rd
|
||||||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_UNHEALTHY || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_2_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_UNHEALTHY;
|
||||||
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
if(rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_1_LEFT || rd.healthPriority == SERVER_KNOBS->PRIORITY_TEAM_0_LEFT) inflightPenalty = SERVER_KNOBS->INFLIGHT_PENALTY_ONE_LEFT;
|
||||||
|
|
||||||
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, 0.0, inflightPenalty);
|
auto req = GetTeamRequest(rd.wantsNewServers, rd.priority == SERVER_KNOBS->PRIORITY_REBALANCE_UNDERUTILIZED_TEAM, true, false, SERVER_KNOBS->MIN_FREE_SPACE_RATIO, inflightPenalty);
|
||||||
req.completeSources = rd.completeSources;
|
req.completeSources = rd.completeSources;
|
||||||
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
|
Optional<Reference<IDataDistributionTeam>> bestTeam = wait(brokenPromiseToNever(self->teamCollections[tciIndex].getTeam.getReply(req)));
|
||||||
// If a DC has no healthy team, we stop checking the other DCs until
|
// If a DC has no healthy team, we stop checking the other DCs until
|
||||||
|
|
|
@ -49,7 +49,7 @@ public:
|
||||||
|
|
||||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0;
|
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) = 0;
|
||||||
|
|
||||||
//Returns the amount of free and total space for this store, in bytes
|
//Returns the amount of free and total space for this store, in bytes
|
||||||
virtual StorageBytes getStorageBytes() = 0;
|
virtual StorageBytes getStorageBytes() = 0;
|
||||||
|
|
|
@ -77,12 +77,12 @@ struct KeyValueStoreCompressTestData : IKeyValueStore {
|
||||||
|
|
||||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
||||||
return doReadRange(store, keys, rowLimit, byteLimit);
|
return doReadRange(store, keys, rowLimit, byteLimit);
|
||||||
}
|
}
|
||||||
ACTOR Future<Standalone<VectorRef<KeyValueRef>>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
ACTOR Future<Standalone<RangeResultRef>> doReadRange( IKeyValueStore* store, KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||||
Standalone<VectorRef<KeyValueRef>> _vs = wait( store->readRange(keys, rowLimit, byteLimit) );
|
Standalone<RangeResultRef> _vs = wait( store->readRange(keys, rowLimit, byteLimit) );
|
||||||
Standalone<VectorRef<KeyValueRef>> vs = _vs; // Get rid of implicit const& from wait statement
|
Standalone<RangeResultRef> vs = _vs; // Get rid of implicit const& from wait statement
|
||||||
Arena& a = vs.arena();
|
Arena& a = vs.arena();
|
||||||
for(int i=0; i<vs.size(); i++)
|
for(int i=0; i<vs.size(); i++)
|
||||||
vs[i].value = ValueRef( a, (ValueRef const&)unpack(vs[i].value) );
|
vs[i].value = ValueRef( a, (ValueRef const&)unpack(vs[i].value) );
|
||||||
|
|
|
@ -216,14 +216,18 @@ public:
|
||||||
|
|
||||||
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
// If rowLimit>=0, reads first rows sorted ascending, otherwise reads last rows sorted descending
|
||||||
// The total size of the returned value (less the last entry) will be less than byteLimit
|
// The total size of the returned value (less the last entry) will be less than byteLimit
|
||||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) {
|
||||||
if(recovering.isError()) throw recovering.getError();
|
if(recovering.isError()) throw recovering.getError();
|
||||||
if (!recovering.isReady()) return waitAndReadRange(this, keys, rowLimit, byteLimit);
|
if (!recovering.isReady()) return waitAndReadRange(this, keys, rowLimit, byteLimit);
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> result;
|
Standalone<RangeResultRef> result;
|
||||||
if (rowLimit >= 0) {
|
if (rowLimit == 0) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rowLimit > 0) {
|
||||||
auto it = data.lower_bound(keys.begin);
|
auto it = data.lower_bound(keys.begin);
|
||||||
while (it!=data.end() && it->key < keys.end && rowLimit && byteLimit>=0) {
|
while (it!=data.end() && it->key < keys.end && rowLimit && byteLimit>0) {
|
||||||
byteLimit -= sizeof(KeyValueRef) + it->key.size() + it->value.size();
|
byteLimit -= sizeof(KeyValueRef) + it->key.size() + it->value.size();
|
||||||
result.push_back_deep( result.arena(), KeyValueRef(it->key, it->value) );
|
result.push_back_deep( result.arena(), KeyValueRef(it->key, it->value) );
|
||||||
++it;
|
++it;
|
||||||
|
@ -232,13 +236,19 @@ public:
|
||||||
} else {
|
} else {
|
||||||
rowLimit = -rowLimit;
|
rowLimit = -rowLimit;
|
||||||
auto it = data.previous( data.lower_bound(keys.end) );
|
auto it = data.previous( data.lower_bound(keys.end) );
|
||||||
while (it!=data.end() && it->key >= keys.begin && rowLimit && byteLimit>=0) {
|
while (it!=data.end() && it->key >= keys.begin && rowLimit && byteLimit>0) {
|
||||||
byteLimit -= sizeof(KeyValueRef) + it->key.size() + it->value.size();
|
byteLimit -= sizeof(KeyValueRef) + it->key.size() + it->value.size();
|
||||||
result.push_back_deep( result.arena(), KeyValueRef(it->key, it->value) );
|
result.push_back_deep( result.arena(), KeyValueRef(it->key, it->value) );
|
||||||
it = data.previous(it);
|
it = data.previous(it);
|
||||||
--rowLimit;
|
--rowLimit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result.more = rowLimit == 0 || byteLimit <= 0;
|
||||||
|
if(result.more) {
|
||||||
|
ASSERT(result.size() > 0);
|
||||||
|
result.readThrough = result[result.size()-1].key;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -694,7 +704,7 @@ private:
|
||||||
wait( self->recovering );
|
wait( self->recovering );
|
||||||
return self->readValuePrefix(key, maxLength).get();
|
return self->readValuePrefix(key, maxLength).get();
|
||||||
}
|
}
|
||||||
ACTOR static Future<Standalone<VectorRef<KeyValueRef>>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) {
|
ACTOR static Future<Standalone<RangeResultRef>> waitAndReadRange( KeyValueStoreMemory* self, KeyRange keys, int rowLimit, int byteLimit ) {
|
||||||
wait( self->recovering );
|
wait( self->recovering );
|
||||||
return self->readRange(keys, rowLimit, byteLimit).get();
|
return self->readRange(keys, rowLimit, byteLimit).get();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1076,21 +1076,26 @@ struct RawCursor {
|
||||||
}
|
}
|
||||||
return Optional<Value>();
|
return Optional<Value>();
|
||||||
}
|
}
|
||||||
Standalone<VectorRef<KeyValueRef>> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
Standalone<RangeResultRef> getRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||||
Standalone<VectorRef<KeyValueRef>> result;
|
Standalone<RangeResultRef> result;
|
||||||
int accumulatedBytes = 0;
|
int accumulatedBytes = 0;
|
||||||
ASSERT( byteLimit > 0 );
|
ASSERT( byteLimit > 0 );
|
||||||
|
if(rowLimit == 0) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
if(db.fragment_values) {
|
if(db.fragment_values) {
|
||||||
if(rowLimit >= 0) {
|
if(rowLimit > 0) {
|
||||||
int r = moveTo(keys.begin);
|
int r = moveTo(keys.begin);
|
||||||
if (r < 0)
|
if (r < 0)
|
||||||
moveNext();
|
moveNext();
|
||||||
|
|
||||||
DefragmentingReader i(*this, result.arena(), true);
|
DefragmentingReader i(*this, result.arena(), true);
|
||||||
Optional<KeyRef> nextKey = i.peek();
|
Optional<KeyRef> nextKey = i.peek();
|
||||||
while(nextKey.present() && nextKey.get() < keys.end && rowLimit-- && accumulatedBytes < byteLimit) {
|
while(nextKey.present() && nextKey.get() < keys.end && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||||
Optional<KeyValueRef> kv = i.getNext();
|
Optional<KeyValueRef> kv = i.getNext();
|
||||||
result.push_back(result.arena(), kv.get());
|
result.push_back(result.arena(), kv.get());
|
||||||
|
--rowLimit;
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
||||||
nextKey = i.peek();
|
nextKey = i.peek();
|
||||||
}
|
}
|
||||||
|
@ -1101,37 +1106,45 @@ struct RawCursor {
|
||||||
movePrevious();
|
movePrevious();
|
||||||
DefragmentingReader i(*this, result.arena(), false);
|
DefragmentingReader i(*this, result.arena(), false);
|
||||||
Optional<KeyRef> nextKey = i.peek();
|
Optional<KeyRef> nextKey = i.peek();
|
||||||
while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit++ && accumulatedBytes < byteLimit) {
|
while(nextKey.present() && nextKey.get() >= keys.begin && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||||
Optional<KeyValueRef> kv = i.getNext();
|
Optional<KeyValueRef> kv = i.getNext();
|
||||||
result.push_back(result.arena(), kv.get());
|
result.push_back(result.arena(), kv.get());
|
||||||
|
++rowLimit;
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
accumulatedBytes += sizeof(KeyValueRef) + kv.get().expectedSize();
|
||||||
nextKey = i.peek();
|
nextKey = i.peek();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (rowLimit >= 0) {
|
if (rowLimit > 0) {
|
||||||
int r = moveTo( keys.begin );
|
int r = moveTo( keys.begin );
|
||||||
if (r < 0) moveNext();
|
if (r < 0) moveNext();
|
||||||
while (this->valid && rowLimit-- && accumulatedBytes < byteLimit) {
|
while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||||
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
|
||||||
if (kv.key >= keys.end) break;
|
if (kv.key >= keys.end) break;
|
||||||
|
--rowLimit;
|
||||||
|
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||||
result.push_back( result.arena(), kv );
|
result.push_back( result.arena(), kv );
|
||||||
moveNext();
|
moveNext();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
int r = moveTo( keys.end );
|
int r = moveTo( keys.end );
|
||||||
if (r >= 0) movePrevious();
|
if (r >= 0) movePrevious();
|
||||||
while (this->valid && rowLimit++ && accumulatedBytes < byteLimit) {
|
while (this->valid && rowLimit != 0 && accumulatedBytes < byteLimit) {
|
||||||
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
KeyValueRef kv = decodeKV( getEncodedRow( result.arena() ) );
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
|
||||||
if (kv.key < keys.begin) break;
|
if (kv.key < keys.begin) break;
|
||||||
|
++rowLimit;
|
||||||
|
accumulatedBytes += sizeof(KeyValueRef) + kv.expectedSize();
|
||||||
result.push_back( result.arena(), kv );
|
result.push_back( result.arena(), kv );
|
||||||
movePrevious();
|
movePrevious();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
|
||||||
|
if(result.more) {
|
||||||
|
ASSERT(result.size() > 0);
|
||||||
|
result.readThrough = result[result.size()-1].key;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1451,7 +1464,7 @@ public:
|
||||||
|
|
||||||
virtual Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID );
|
virtual Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID );
|
||||||
virtual Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID );
|
virtual Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID );
|
||||||
virtual Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 );
|
virtual Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 );
|
||||||
|
|
||||||
KeyValueStoreSQLite(std::string const& filename, UID logID, KeyValueStoreType type, bool checkChecksums, bool checkIntegrity);
|
KeyValueStoreSQLite(std::string const& filename, UID logID, KeyValueStoreType type, bool checkChecksums, bool checkIntegrity);
|
||||||
~KeyValueStoreSQLite();
|
~KeyValueStoreSQLite();
|
||||||
|
@ -1550,7 +1563,7 @@ private:
|
||||||
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
|
struct ReadRangeAction : TypedAction<Reader, ReadRangeAction>, FastAllocated<ReadRangeAction> {
|
||||||
KeyRange keys;
|
KeyRange keys;
|
||||||
int rowLimit, byteLimit;
|
int rowLimit, byteLimit;
|
||||||
ThreadReturnPromise<Standalone<VectorRef<KeyValueRef>>> result;
|
ThreadReturnPromise<Standalone<RangeResultRef>> result;
|
||||||
ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit) : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit) {}
|
ReadRangeAction(KeyRange keys, int rowLimit, int byteLimit) : keys(keys), rowLimit(rowLimit), byteLimit(byteLimit) {}
|
||||||
virtual double getTimeEstimate() { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
|
virtual double getTimeEstimate() { return SERVER_KNOBS->READ_RANGE_TIME_ESTIMATE; }
|
||||||
};
|
};
|
||||||
|
@ -2000,7 +2013,7 @@ Future<Optional<Value>> KeyValueStoreSQLite::readValuePrefix( KeyRef key, int ma
|
||||||
readThreads->post(p);
|
readThreads->post(p);
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
Future<Standalone<VectorRef<KeyValueRef>>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
Future<Standalone<RangeResultRef>> KeyValueStoreSQLite::readRange( KeyRangeRef keys, int rowLimit, int byteLimit ) {
|
||||||
++readsRequested;
|
++readsRequested;
|
||||||
auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
|
auto p = new Reader::ReadRangeAction(keys, rowLimit, byteLimit);
|
||||||
auto f = p->result.getFuture();
|
auto f = p->result.getFuture();
|
||||||
|
|
|
@ -79,7 +79,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
||||||
init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue
|
init( DISK_QUEUE_FILE_EXTENSION_BYTES, 10<<20 ); // BUGGIFYd per file within the DiskQueue
|
||||||
init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue
|
init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue
|
||||||
init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0;
|
init( DISK_QUEUE_MAX_TRUNCATE_BYTES, 2<<30 ); if ( randomize && BUGGIFY ) DISK_QUEUE_MAX_TRUNCATE_BYTES = 0;
|
||||||
init( TLOG_DEGRADED_DELAY_COUNT, 5 );
|
|
||||||
init( TLOG_DEGRADED_DURATION, 5.0 );
|
init( TLOG_DEGRADED_DURATION, 5.0 );
|
||||||
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
init( TLOG_IGNORE_POP_AUTO_ENABLE_DELAY, 300.0 );
|
||||||
init( TXS_POPPED_MAX_DELAY, 1.0 ); if ( randomize && BUGGIFY ) TXS_POPPED_MAX_DELAY = deterministicRandom()->random01();
|
init( TXS_POPPED_MAX_DELAY, 1.0 ); if ( randomize && BUGGIFY ) TXS_POPPED_MAX_DELAY = deterministicRandom()->random01();
|
||||||
|
@ -166,7 +165,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
||||||
If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated.
|
If this value is too small relative to SHARD_MIN_BYTES_PER_KSEC immediate merging work will be generated.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
init( STORAGE_METRIC_TIMEOUT, 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 60.0;
|
init( STORAGE_METRIC_TIMEOUT, isSimulated ? 60.0 : 600.0 ); if( randomize && BUGGIFY ) STORAGE_METRIC_TIMEOUT = deterministicRandom()->coinflip() ? 10.0 : 30.0;
|
||||||
init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0;
|
init( METRIC_DELAY, 0.1 ); if( randomize && BUGGIFY ) METRIC_DELAY = 1.0;
|
||||||
init( ALL_DATA_REMOVED_DELAY, 1.0 );
|
init( ALL_DATA_REMOVED_DELAY, 1.0 );
|
||||||
init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0;
|
init( INITIAL_FAILURE_REACTION_DELAY, 30.0 ); if( randomize && BUGGIFY ) INITIAL_FAILURE_REACTION_DELAY = 0.0;
|
||||||
|
@ -181,8 +180,8 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
||||||
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
init( DATA_DISTRIBUTION_LOGGING_INTERVAL, 5.0 );
|
||||||
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
init( DD_ENABLED_CHECK_DELAY, 1.0 );
|
||||||
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
init( DD_STALL_CHECK_DELAY, 0.4 ); //Must be larger than 2*MAX_BUGGIFIED_DELAY
|
||||||
init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 90.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY
|
init( DD_LOW_BANDWIDTH_DELAY, isSimulated ? 15.0 : 240.0 ); if( randomize && BUGGIFY ) DD_LOW_BANDWIDTH_DELAY = 0; //Because of delayJitter, this should be less than 0.9 * DD_MERGE_COALESCE_DELAY
|
||||||
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 120.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
|
init( DD_MERGE_COALESCE_DELAY, isSimulated ? 30.0 : 300.0 ); if( randomize && BUGGIFY ) DD_MERGE_COALESCE_DELAY = 0.001;
|
||||||
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
|
init( STORAGE_METRICS_POLLING_DELAY, 2.0 ); if( randomize && BUGGIFY ) STORAGE_METRICS_POLLING_DELAY = 15.0;
|
||||||
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );
|
init( STORAGE_METRICS_RANDOM_DELAY, 0.2 );
|
||||||
init( FREE_SPACE_RATIO_CUTOFF, 0.1 );
|
init( FREE_SPACE_RATIO_CUTOFF, 0.1 );
|
||||||
|
@ -319,6 +318,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
||||||
init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
|
init( REQUIRED_MIN_RECOVERY_DURATION, 0.080 ); if( shortRecoveryDuration ) REQUIRED_MIN_RECOVERY_DURATION = 0.01;
|
||||||
init( ALWAYS_CAUSAL_READ_RISKY, false );
|
init( ALWAYS_CAUSAL_READ_RISKY, false );
|
||||||
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
|
init( MAX_COMMIT_UPDATES, 2000 ); if( randomize && BUGGIFY ) MAX_COMMIT_UPDATES = 1;
|
||||||
|
init( MIN_PROXY_COMPUTE, 0.001 );
|
||||||
|
init( PROXY_COMPUTE_BUCKETS, 5000 );
|
||||||
|
init( PROXY_COMPUTE_GROWTH_RATE, 0.01 );
|
||||||
|
|
||||||
// Master Server
|
// Master Server
|
||||||
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
|
// masterCommitter() in the master server will allow lower priority tasks (e.g. DataDistibution)
|
||||||
|
@ -476,7 +478,7 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs, bool isSimula
|
||||||
init( BEHIND_CHECK_DELAY, 2.0 );
|
init( BEHIND_CHECK_DELAY, 2.0 );
|
||||||
init( BEHIND_CHECK_COUNT, 2 );
|
init( BEHIND_CHECK_COUNT, 2 );
|
||||||
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
|
init( BEHIND_CHECK_VERSIONS, 5 * VERSIONS_PER_SECOND );
|
||||||
init( WAIT_METRICS_WRONG_SHARD_CHANCE, 0.1 );
|
init( WAIT_METRICS_WRONG_SHARD_CHANCE, isSimulated ? 1.0 : 0.1 );
|
||||||
|
|
||||||
//Wait Failure
|
//Wait Failure
|
||||||
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
init( MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS, 250 ); if( randomize && BUGGIFY ) MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS = 2;
|
||||||
|
|
|
@ -82,7 +82,6 @@ public:
|
||||||
int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow?
|
int64_t DISK_QUEUE_FILE_EXTENSION_BYTES; // When we grow the disk queue, by how many bytes should it grow?
|
||||||
int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
|
int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
|
||||||
int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead.
|
int DISK_QUEUE_MAX_TRUNCATE_BYTES; // A truncate larger than this will cause the file to be replaced instead.
|
||||||
int TLOG_DEGRADED_DELAY_COUNT;
|
|
||||||
double TLOG_DEGRADED_DURATION;
|
double TLOG_DEGRADED_DURATION;
|
||||||
double TXS_POPPED_MAX_DELAY;
|
double TXS_POPPED_MAX_DELAY;
|
||||||
|
|
||||||
|
@ -264,6 +263,9 @@ public:
|
||||||
double REQUIRED_MIN_RECOVERY_DURATION;
|
double REQUIRED_MIN_RECOVERY_DURATION;
|
||||||
bool ALWAYS_CAUSAL_READ_RISKY;
|
bool ALWAYS_CAUSAL_READ_RISKY;
|
||||||
int MAX_COMMIT_UPDATES;
|
int MAX_COMMIT_UPDATES;
|
||||||
|
double MIN_PROXY_COMPUTE;
|
||||||
|
int PROXY_COMPUTE_BUCKETS;
|
||||||
|
double PROXY_COMPUTE_GROWTH_RATE;
|
||||||
|
|
||||||
// Master Server
|
// Master Server
|
||||||
double COMMIT_SLEEP_TIME;
|
double COMMIT_SLEEP_TIME;
|
||||||
|
|
|
@ -1054,7 +1054,12 @@ ACTOR Future<Void> bufferedGetMore( ILogSystem::BufferedCursor* self, TaskPriori
|
||||||
loop {
|
loop {
|
||||||
wait( allLoaders || delay(SERVER_KNOBS->DESIRED_GET_MORE_DELAY, taskID) );
|
wait( allLoaders || delay(SERVER_KNOBS->DESIRED_GET_MORE_DELAY, taskID) );
|
||||||
minVersion = self->end;
|
minVersion = self->end;
|
||||||
for(auto cursor : self->cursors) {
|
for(int i = 0; i < self->cursors.size(); i++) {
|
||||||
|
auto cursor = self->cursors[i];
|
||||||
|
while(cursor->hasMessage()) {
|
||||||
|
self->cursorMessages[i].push_back(ILogSystem::BufferedCursor::BufferedMessage(cursor->arena(), (!self->withTags || self->collectTags) ? cursor->getMessage() : cursor->getMessageWithTags(), !self->withTags ? VectorRef<Tag>() : cursor->getTags(), cursor->version()));
|
||||||
|
cursor->nextMessage();
|
||||||
|
}
|
||||||
minVersion = std::min(minVersion, cursor->version().version);
|
minVersion = std::min(minVersion, cursor->version().version);
|
||||||
}
|
}
|
||||||
if(minVersion > self->messageVersion.version) {
|
if(minVersion > self->messageVersion.version) {
|
||||||
|
|
|
@ -237,6 +237,8 @@ struct ProxyCommitData {
|
||||||
int updateCommitRequests = 0;
|
int updateCommitRequests = 0;
|
||||||
NotifiedDouble lastCommitTime;
|
NotifiedDouble lastCommitTime;
|
||||||
|
|
||||||
|
vector<double> commitComputePerOperation;
|
||||||
|
|
||||||
//The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient.
|
//The tag related to a storage server rarely change, so we keep a vector of tags for each key range to be slightly more CPU efficient.
|
||||||
//When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated.
|
//When a tag related to a storage server does change, we empty out all of these vectors to signify they must be repopulated.
|
||||||
//We do not repopulate them immediately to avoid a slow task.
|
//We do not repopulate them immediately to avoid a slow task.
|
||||||
|
@ -293,7 +295,9 @@ struct ProxyCommitData {
|
||||||
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
|
localCommitBatchesStarted(0), locked(false), commitBatchInterval(SERVER_KNOBS->COMMIT_TRANSACTION_BATCH_INTERVAL_MIN),
|
||||||
firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
|
firstProxy(firstProxy), cx(openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true)), db(db),
|
||||||
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0), lastStartCommit(0), lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0)
|
singleKeyMutationEvent(LiteralStringRef("SingleKeyMutation")), commitBatchesMemBytesCount(0), lastTxsPop(0), lastStartCommit(0), lastCommitLatency(SERVER_KNOBS->REQUIRED_MIN_RECOVERY_DURATION), lastCommitTime(0)
|
||||||
{}
|
{
|
||||||
|
commitComputePerOperation.resize(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS,0.0);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ResolutionRequestBuilder {
|
struct ResolutionRequestBuilder {
|
||||||
|
@ -475,6 +479,13 @@ bool isWhitelisted(const vector<Standalone<StringRef>>& binPathVec, StringRef bi
|
||||||
return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end();
|
return std::find(binPathVec.begin(), binPathVec.end(), binPath) != binPathVec.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> releaseResolvingAfter(ProxyCommitData* self, Future<Void> releaseDelay, int64_t localBatchNumber) {
|
||||||
|
wait(releaseDelay);
|
||||||
|
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
|
||||||
|
self->latestLocalCommitBatchResolving.set(localBatchNumber);
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> commitBatch(
|
ACTOR Future<Void> commitBatch(
|
||||||
ProxyCommitData* self,
|
ProxyCommitData* self,
|
||||||
vector<CommitTransactionRequest> trs,
|
vector<CommitTransactionRequest> trs,
|
||||||
|
@ -486,6 +497,14 @@ ACTOR Future<Void> commitBatch(
|
||||||
state Optional<UID> debugID;
|
state Optional<UID> debugID;
|
||||||
state bool forceRecovery = false;
|
state bool forceRecovery = false;
|
||||||
state BinaryWriter valueWriter(Unversioned());
|
state BinaryWriter valueWriter(Unversioned());
|
||||||
|
|
||||||
|
state int batchOperations = 0;
|
||||||
|
int64_t batchBytes = 0;
|
||||||
|
for (int t = 0; t<trs.size(); t++) {
|
||||||
|
batchOperations += trs[t].transaction.mutations.size();
|
||||||
|
batchBytes += trs[t].transaction.mutations.expectedSize();
|
||||||
|
}
|
||||||
|
state int latencyBucket = batchOperations == 0 ? 0 : std::min<int>(SERVER_KNOBS->PROXY_COMPUTE_BUCKETS-1,SERVER_KNOBS->PROXY_COMPUTE_BUCKETS*batchBytes/(batchOperations*(CLIENT_KNOBS->VALUE_SIZE_LIMIT+CLIENT_KNOBS->KEY_SIZE_LIMIT)));
|
||||||
|
|
||||||
ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight!
|
ASSERT(SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS <= SERVER_KNOBS->MAX_VERSIONS_IN_FLIGHT); // since we are using just the former to limit the number of versions actually in flight!
|
||||||
|
|
||||||
|
@ -515,7 +534,7 @@ ACTOR Future<Void> commitBatch(
|
||||||
/////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield)
|
/////// Phase 1: Pre-resolution processing (CPU bound except waiting for a version # which is separately pipelined and *should* be available by now (unless empty commit); ordered; currently atomic but could yield)
|
||||||
TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber-1); // Queuing pre-resolution commit processing
|
TEST(self->latestLocalCommitBatchResolving.get() < localBatchNumber-1); // Queuing pre-resolution commit processing
|
||||||
wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
|
wait(self->latestLocalCommitBatchResolving.whenAtLeast(localBatchNumber-1));
|
||||||
wait(yield(TaskPriority::ProxyCommitYield1));
|
state Future<Void> releaseDelay = delay(batchOperations*self->commitComputePerOperation[latencyBucket], TaskPriority::ProxyMasterVersionReply);
|
||||||
|
|
||||||
if (debugID.present())
|
if (debugID.present())
|
||||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");
|
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.GettingCommitVersion");
|
||||||
|
@ -566,9 +585,7 @@ ACTOR Future<Void> commitBatch(
|
||||||
}
|
}
|
||||||
|
|
||||||
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
|
state vector<vector<int>> transactionResolverMap = std::move( requests.transactionResolverMap );
|
||||||
|
state Future<Void> releaseFuture = releaseResolvingAfter(self, releaseDelay, localBatchNumber);
|
||||||
ASSERT(self->latestLocalCommitBatchResolving.get() == localBatchNumber-1);
|
|
||||||
self->latestLocalCommitBatchResolving.set(localBatchNumber);
|
|
||||||
|
|
||||||
/////// Phase 2: Resolution (waiting on the network; pipelined)
|
/////// Phase 2: Resolution (waiting on the network; pipelined)
|
||||||
state vector<ResolveTransactionBatchReply> resolution = wait( getAll(replies) );
|
state vector<ResolveTransactionBatchReply> resolution = wait( getAll(replies) );
|
||||||
|
@ -579,8 +596,10 @@ ACTOR Future<Void> commitBatch(
|
||||||
////// Phase 3: Post-resolution processing (CPU bound except for very rare situations; ordered; currently atomic but doesn't need to be)
|
////// Phase 3: Post-resolution processing (CPU bound except for very rare situations; ordered; currently atomic but doesn't need to be)
|
||||||
TEST(self->latestLocalCommitBatchLogging.get() < localBatchNumber-1); // Queuing post-resolution commit processing
|
TEST(self->latestLocalCommitBatchLogging.get() < localBatchNumber-1); // Queuing post-resolution commit processing
|
||||||
wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
|
wait(self->latestLocalCommitBatchLogging.whenAtLeast(localBatchNumber-1));
|
||||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
wait(yield(TaskPriority::ProxyCommitYield1));
|
||||||
|
|
||||||
|
state double computeStart = g_network->timer();
|
||||||
|
state double computeDuration = 0;
|
||||||
self->stats.txnCommitResolved += trs.size();
|
self->stats.txnCommitResolved += trs.size();
|
||||||
|
|
||||||
if (debugID.present())
|
if (debugID.present())
|
||||||
|
@ -738,7 +757,11 @@ ACTOR Future<Void> commitBatch(
|
||||||
for (; mutationNum < pMutations->size(); mutationNum++) {
|
for (; mutationNum < pMutations->size(); mutationNum++) {
|
||||||
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||||
yieldBytes = 0;
|
yieldBytes = 0;
|
||||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
|
||||||
|
computeDuration += g_network->timer() - computeStart;
|
||||||
|
wait(delay(0, TaskPriority::ProxyCommitYield1));
|
||||||
|
computeStart = g_network->timer();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto& m = (*pMutations)[mutationNum];
|
auto& m = (*pMutations)[mutationNum];
|
||||||
|
@ -844,7 +867,11 @@ ACTOR Future<Void> commitBatch(
|
||||||
while(blobIter) {
|
while(blobIter) {
|
||||||
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
if(yieldBytes > SERVER_KNOBS->DESIRED_TOTAL_BYTES) {
|
||||||
yieldBytes = 0;
|
yieldBytes = 0;
|
||||||
wait(yield(TaskPriority::ProxyCommitYield2));
|
if(g_network->check_yield(TaskPriority::ProxyCommitYield1)) {
|
||||||
|
computeDuration += g_network->timer() - computeStart;
|
||||||
|
wait(delay(0, TaskPriority::ProxyCommitYield1));
|
||||||
|
computeStart = g_network->timer();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
valueWriter.serializeBytes(blobIter->data);
|
valueWriter.serializeBytes(blobIter->data);
|
||||||
yieldBytes += blobIter->data.size();
|
yieldBytes += blobIter->data.size();
|
||||||
|
@ -906,29 +933,33 @@ ACTOR Future<Void> commitBatch(
|
||||||
|
|
||||||
// Storage servers mustn't make durable versions which are not fully committed (because then they are impossible to roll back)
|
// Storage servers mustn't make durable versions which are not fully committed (because then they are impossible to roll back)
|
||||||
// We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window
|
// We prevent this by limiting the number of versions which are semi-committed but not fully committed to be less than the MVCC window
|
||||||
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
if(self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||||
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
|
computeDuration += g_network->timer() - computeStart;
|
||||||
TEST(true); // Semi-committed pipeline limited by MVCC window
|
while (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS) {
|
||||||
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
|
// This should be *extremely* rare in the real world, but knob buggification should make it happen in simulation
|
||||||
choose{
|
TEST(true); // Semi-committed pipeline limited by MVCC window
|
||||||
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
|
//TraceEvent("ProxyWaitingForCommitted", self->dbgid).detail("CommittedVersion", self->committedVersion.get()).detail("NeedToCommit", commitVersion);
|
||||||
wait(yield());
|
choose{
|
||||||
break;
|
when(wait(self->committedVersion.whenAtLeast(commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS))) {
|
||||||
}
|
wait(yield());
|
||||||
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
break;
|
||||||
if(v.version > self->committedVersion.get()) {
|
}
|
||||||
self->locked = v.locked;
|
when(GetReadVersionReply v = wait(self->getConsistentReadVersion.getReply(GetReadVersionRequest(0, GetReadVersionRequest::PRIORITY_SYSTEM_IMMEDIATE | GetReadVersionRequest::FLAG_CAUSAL_READ_RISKY)))) {
|
||||||
self->metadataVersion = v.metadataVersion;
|
if(v.version > self->committedVersion.get()) {
|
||||||
self->committedVersion.set(v.version);
|
self->locked = v.locked;
|
||||||
|
self->metadataVersion = v.metadataVersion;
|
||||||
|
self->committedVersion.set(v.version);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
|
||||||
|
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (self->committedVersion.get() < commitVersion - SERVER_KNOBS->MAX_READ_TRANSACTION_LIFE_VERSIONS)
|
|
||||||
wait(delay(SERVER_KNOBS->PROXY_SPIN_DELAY));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
computeStart = g_network->timer();
|
||||||
}
|
}
|
||||||
|
|
||||||
state LogSystemDiskQueueAdapter::CommitMessage msg = wait(storeCommits.back().first); // Should just be doing yields
|
state LogSystemDiskQueueAdapter::CommitMessage msg = storeCommits.back().first.get();
|
||||||
|
|
||||||
if (debugID.present())
|
if (debugID.present())
|
||||||
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits");
|
g_traceBatch.addEvent("CommitDebug", debugID.get().first(), "MasterProxyServer.commitBatch.AfterStoreCommits");
|
||||||
|
@ -961,6 +992,16 @@ ACTOR Future<Void> commitBatch(
|
||||||
self->latestLocalCommitBatchLogging.set(localBatchNumber);
|
self->latestLocalCommitBatchLogging.set(localBatchNumber);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
computeDuration += g_network->timer() - computeStart;
|
||||||
|
if(computeDuration > SERVER_KNOBS->MIN_PROXY_COMPUTE && batchOperations > 0) {
|
||||||
|
double computePerOperation = computeDuration/batchOperations;
|
||||||
|
if(computePerOperation <= self->commitComputePerOperation[latencyBucket] || self->commitComputePerOperation[latencyBucket] == 0.0) {
|
||||||
|
self->commitComputePerOperation[latencyBucket] = computePerOperation;
|
||||||
|
} else {
|
||||||
|
self->commitComputePerOperation[latencyBucket] = SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE*computePerOperation + ((1.0-SERVER_KNOBS->PROXY_COMPUTE_GROWTH_RATE)*self->commitComputePerOperation[latencyBucket]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above))
|
/////// Phase 4: Logging (network bound; pipelined up to MAX_READ_TRANSACTION_LIFE_VERSIONS (limited by loop above))
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
@ -978,7 +1019,7 @@ ACTOR Future<Void> commitBatch(
|
||||||
}
|
}
|
||||||
self->lastCommitLatency = now()-commitStartTime;
|
self->lastCommitLatency = now()-commitStartTime;
|
||||||
self->lastCommitTime = std::max(self->lastCommitTime.get(), commitStartTime);
|
self->lastCommitTime = std::max(self->lastCommitTime.get(), commitStartTime);
|
||||||
wait(yield(TaskPriority::ProxyCommitYield3));
|
wait(yield(TaskPriority::ProxyCommitYield2));
|
||||||
|
|
||||||
if( self->popRemoteTxs && msg.popTo > ( self->txsPopVersions.size() ? self->txsPopVersions.back().second : self->lastTxsPop ) ) {
|
if( self->popRemoteTxs && msg.popTo > ( self->txsPopVersions.size() ? self->txsPopVersions.back().second : self->lastTxsPop ) ) {
|
||||||
if(self->txsPopVersions.size() >= SERVER_KNOBS->MAX_TXS_POP_VERSION_HISTORY) {
|
if(self->txsPopVersions.size() >= SERVER_KNOBS->MAX_TXS_POP_VERSION_HISTORY) {
|
||||||
|
@ -1017,7 +1058,7 @@ ACTOR Future<Void> commitBatch(
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send replies to clients
|
// Send replies to clients
|
||||||
double endTime = timer();
|
double endTime = g_network->timer();
|
||||||
for (int t = 0; t < trs.size(); t++) {
|
for (int t = 0; t < trs.size(); t++) {
|
||||||
if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) {
|
if (committed[t] == ConflictBatch::TransactionCommitted && (!locked || trs[t].isLockAware())) {
|
||||||
ASSERT_WE_THINK(commitVersion != invalidVersion);
|
ASSERT_WE_THINK(commitVersion != invalidVersion);
|
||||||
|
@ -1068,6 +1109,7 @@ ACTOR Future<Void> commitBatch(
|
||||||
|
|
||||||
self->commitBatchesMemBytesCount -= currentBatchMemBytesCount;
|
self->commitBatchesMemBytesCount -= currentBatchMemBytesCount;
|
||||||
ASSERT_ABORT(self->commitBatchesMemBytesCount >= 0);
|
ASSERT_ABORT(self->commitBatchesMemBytesCount >= 0);
|
||||||
|
wait(releaseFuture);
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1149,7 +1191,7 @@ struct TransactionRateInfo {
|
||||||
|
|
||||||
ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::vector<GetReadVersionRequest> requests, ProxyStats *stats) {
|
ACTOR Future<Void> sendGrvReplies(Future<GetReadVersionReply> replyFuture, std::vector<GetReadVersionRequest> requests, ProxyStats *stats) {
|
||||||
GetReadVersionReply reply = wait(replyFuture);
|
GetReadVersionReply reply = wait(replyFuture);
|
||||||
double end = timer();
|
double end = g_network->timer();
|
||||||
for(GetReadVersionRequest const& request : requests) {
|
for(GetReadVersionRequest const& request : requests) {
|
||||||
if(request.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) {
|
if(request.priority() >= GetReadVersionRequest::PRIORITY_DEFAULT) {
|
||||||
stats->grvLatencyBands.addMeasurement(end - request.requestTime());
|
stats->grvLatencyBands.addMeasurement(end - request.requestTime());
|
||||||
|
@ -1345,7 +1387,7 @@ ACTOR static Future<Void> rejoinServer( MasterProxyInterface proxy, ProxyCommitD
|
||||||
GetStorageServerRejoinInfoReply rep;
|
GetStorageServerRejoinInfoReply rep;
|
||||||
rep.version = commitData->version;
|
rep.version = commitData->version;
|
||||||
rep.tag = decodeServerTagValue( commitData->txnStateStore->readValue(serverTagKeyFor(req.id)).get().get() );
|
rep.tag = decodeServerTagValue( commitData->txnStateStore->readValue(serverTagKeyFor(req.id)).get().get() );
|
||||||
Standalone<VectorRef<KeyValueRef>> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get();
|
Standalone<RangeResultRef> history = commitData->txnStateStore->readRange(serverTagHistoryRangeFor(req.id)).get();
|
||||||
for(int i = history.size()-1; i >= 0; i-- ) {
|
for(int i = history.size()-1; i >= 0; i-- ) {
|
||||||
rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value)));
|
rep.history.push_back(std::make_pair(decodeServerTagHistoryKey(history[i].key), decodeServerTagValue(history[i].value)));
|
||||||
}
|
}
|
||||||
|
@ -1696,7 +1738,7 @@ ACTOR Future<Void> masterProxyServerCore(
|
||||||
state KeyRange txnKeys = allKeys;
|
state KeyRange txnKeys = allKeys;
|
||||||
loop {
|
loop {
|
||||||
wait(yield());
|
wait(yield());
|
||||||
Standalone<VectorRef<KeyValueRef>> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get();
|
Standalone<RangeResultRef> data = commitData.txnStateStore->readRange(txnKeys, SERVER_KNOBS->BUGGIFIED_ROW_LIMIT, SERVER_KNOBS->APPLY_MUTATION_BYTES).get();
|
||||||
if(!data.size()) break;
|
if(!data.size()) break;
|
||||||
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
||||||
|
|
||||||
|
|
|
@ -270,6 +270,7 @@ namespace oldTLog_4_6 {
|
||||||
std::map<UID, Reference<struct LogData>> id_data;
|
std::map<UID, Reference<struct LogData>> id_data;
|
||||||
|
|
||||||
UID dbgid;
|
UID dbgid;
|
||||||
|
UID workerID;
|
||||||
|
|
||||||
IKeyValueStore* persistentData;
|
IKeyValueStore* persistentData;
|
||||||
IDiskQueue* rawPersistentQueue;
|
IDiskQueue* rawPersistentQueue;
|
||||||
|
@ -303,8 +304,8 @@ namespace oldTLog_4_6 {
|
||||||
PromiseStream<Future<Void>> sharedActors;
|
PromiseStream<Future<Void>> sharedActors;
|
||||||
bool terminated;
|
bool terminated;
|
||||||
|
|
||||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
|
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> const& dbInfo)
|
||||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||||
dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
|
dbInfo(dbInfo), queueCommitBegin(0), queueCommitEnd(0), prevVersion(0),
|
||||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
|
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false),
|
||||||
|
@ -412,7 +413,7 @@ namespace oldTLog_4_6 {
|
||||||
// These are initialized differently on init() or recovery
|
// These are initialized differently on init() or recovery
|
||||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void())
|
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), recovery(Void())
|
||||||
{
|
{
|
||||||
startRole(Role::TRANSACTION_LOG,interf.id(), UID());
|
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, "Restored");
|
||||||
|
|
||||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||||
|
@ -954,7 +955,7 @@ namespace oldTLog_4_6 {
|
||||||
|
|
||||||
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
Standalone<RangeResultRef> kvs = wait(
|
||||||
self->persistentData->readRange(KeyRangeRef(
|
self->persistentData->readRange(KeyRangeRef(
|
||||||
persistTagMessagesKey(logData->logId, oldTag, req.begin),
|
persistTagMessagesKey(logData->logId, oldTag, req.begin),
|
||||||
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
persistTagMessagesKey(logData->logId, oldTag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
@ -1120,7 +1121,7 @@ namespace oldTLog_4_6 {
|
||||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||||
TLogRejoinRequest req;
|
TLogRejoinRequest req;
|
||||||
req.myInterface = tli;
|
req.myInterface = tli;
|
||||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||||
choose {
|
choose {
|
||||||
when(TLogRejoinReply rep =
|
when(TLogRejoinReply rep =
|
||||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||||
|
@ -1268,8 +1269,8 @@ namespace oldTLog_4_6 {
|
||||||
|
|
||||||
IKeyValueStore *storage = self->persistentData;
|
IKeyValueStore *storage = self->persistentData;
|
||||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||||
|
|
||||||
// FIXME: metadata in queue?
|
// FIXME: metadata in queue?
|
||||||
|
|
||||||
|
@ -1282,7 +1283,7 @@ namespace oldTLog_4_6 {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fFormat.get().present()) {
|
if (!fFormat.get().present()) {
|
||||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||||
if (!v.size()) {
|
if (!v.size()) {
|
||||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||||
throw worker_removed();
|
throw worker_removed();
|
||||||
|
@ -1335,7 +1336,7 @@ namespace oldTLog_4_6 {
|
||||||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||||
loop {
|
loop {
|
||||||
if(logData->removed.isReady()) break;
|
if(logData->removed.isReady()) break;
|
||||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||||
if (!data.size()) break;
|
if (!data.size()) break;
|
||||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||||
|
|
||||||
|
@ -1421,9 +1422,9 @@ namespace oldTLog_4_6 {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId )
|
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID )
|
||||||
{
|
{
|
||||||
state TLogData self( tlogId, persistentData, persistentQueue, db );
|
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db );
|
||||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId);
|
TraceEvent("SharedTlog", tlogId);
|
||||||
|
|
|
@ -245,6 +245,7 @@ struct TLogData : NonCopyable {
|
||||||
std::map<UID, Reference<struct LogData>> id_data;
|
std::map<UID, Reference<struct LogData>> id_data;
|
||||||
|
|
||||||
UID dbgid;
|
UID dbgid;
|
||||||
|
UID workerID;
|
||||||
|
|
||||||
IKeyValueStore* persistentData;
|
IKeyValueStore* persistentData;
|
||||||
IDiskQueue* rawPersistentQueue;
|
IDiskQueue* rawPersistentQueue;
|
||||||
|
@ -286,8 +287,8 @@ struct TLogData : NonCopyable {
|
||||||
Reference<AsyncVar<bool>> degraded;
|
Reference<AsyncVar<bool>> degraded;
|
||||||
std::vector<TagsAndMessage> tempTagMessages;
|
std::vector<TagsAndMessage> tempTagMessages;
|
||||||
|
|
||||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||||
|
@ -439,14 +440,15 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
bool execOpCommitInProgress;
|
bool execOpCommitInProgress;
|
||||||
int txsTags;
|
int txsTags;
|
||||||
|
|
||||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, std::vector<Tag> tags, std::string context)
|
||||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
|
: tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID),
|
||||||
// These are initialized differently on init() or recovery
|
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
// These are initialized differently on init() or recovery
|
||||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||||
|
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||||
{
|
{
|
||||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
|
||||||
|
|
||||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||||
|
@ -1156,7 +1158,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
||||||
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
peekMessagesFromMemory( logData, req, messages2, endVersion );
|
||||||
}
|
}
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
Standalone<RangeResultRef> kvs = wait(
|
||||||
self->persistentData->readRange(KeyRangeRef(
|
self->persistentData->readRange(KeyRangeRef(
|
||||||
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
||||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
@ -1220,12 +1222,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
|
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||||
state int loopCount = 0;
|
|
||||||
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
|
|
||||||
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
|
|
||||||
loopCount++;
|
|
||||||
}
|
|
||||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||||
TEST(true); //6.0 TLog degraded
|
TEST(true); //6.0 TLog degraded
|
||||||
self->degraded->set(true);
|
self->degraded->set(true);
|
||||||
|
@ -1478,7 +1476,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
|
||||||
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
||||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||||
TLogRejoinRequest req(tli);
|
TLogRejoinRequest req(tli);
|
||||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||||
choose {
|
choose {
|
||||||
when(TLogRejoinReply rep =
|
when(TLogRejoinReply rep =
|
||||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||||
|
@ -1927,12 +1925,12 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
state IKeyValueStore *storage = self->persistentData;
|
state IKeyValueStore *storage = self->persistentData;
|
||||||
wait(storage->init());
|
wait(storage->init());
|
||||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
|
state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||||
|
|
||||||
// FIXME: metadata in queue?
|
// FIXME: metadata in queue?
|
||||||
|
|
||||||
|
@ -1951,7 +1949,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fFormat.get().present()) {
|
if (!fFormat.get().present()) {
|
||||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||||
if (!v.size()) {
|
if (!v.size()) {
|
||||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||||
throw worker_removed();
|
throw worker_removed();
|
||||||
|
@ -1973,7 +1971,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
tlogRequests.getFuture().pop().reply.sendError(recruitment_failed());
|
tlogRequests.getFuture().pop().reply.sendError(recruitment_failed());
|
||||||
}
|
}
|
||||||
|
|
||||||
wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid) );
|
wait( oldTLog_4_6::tLog(self->persistentData, self->rawPersistentQueue, self->dbInfo, locality, self->dbgid, self->workerID) );
|
||||||
throw internal_error();
|
throw internal_error();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2019,7 +2017,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
DUMPTOKEN( recruited.confirmRunning );
|
DUMPTOKEN( recruited.confirmRunning );
|
||||||
|
|
||||||
//We do not need the remoteTag, because we will not be loading any additional data
|
//We do not need the remoteTag, because we will not be loading any additional data
|
||||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>()) );
|
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), std::vector<Tag>(), "Restored") );
|
||||||
logData->locality = id_locality[id1];
|
logData->locality = id_locality[id1];
|
||||||
logData->stopped = true;
|
logData->stopped = true;
|
||||||
self->id_data[id1] = logData;
|
self->id_data[id1] = logData;
|
||||||
|
@ -2041,7 +2039,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||||
loop {
|
loop {
|
||||||
if(logData->removed.isReady()) break;
|
if(logData->removed.isReady()) break;
|
||||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||||
if (!data.size()) break;
|
if (!data.size()) break;
|
||||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||||
|
|
||||||
|
@ -2202,7 +2200,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
||||||
it.second->stopCommit.trigger();
|
it.second->stopCommit.trigger();
|
||||||
}
|
}
|
||||||
|
|
||||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags) );
|
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
|
||||||
|
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, req.allTags, recovering ? "Recovered" : "Recruited") );
|
||||||
self->id_data[recruited.id()] = logData;
|
self->id_data[recruited.id()] = logData;
|
||||||
logData->locality = req.locality;
|
logData->locality = req.locality;
|
||||||
logData->recoveryCount = req.epoch;
|
logData->recoveryCount = req.epoch;
|
||||||
|
@ -2218,7 +2217,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
||||||
throw logData->removed.getError();
|
throw logData->removed.getError();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) {
|
if (recovering) {
|
||||||
logData->unrecoveredBefore = req.startVersion;
|
logData->unrecoveredBefore = req.startVersion;
|
||||||
logData->recoveredAt = req.recoverAt;
|
logData->recoveredAt = req.recoverAt;
|
||||||
logData->knownCommittedVersion = req.startVersion - 1;
|
logData->knownCommittedVersion = req.startVersion - 1;
|
||||||
|
@ -2324,13 +2323,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
|
||||||
}
|
}
|
||||||
|
|
||||||
// New tLog (if !recoverFrom.size()) or restore from network
|
// New tLog (if !recoverFrom.size()) or restore from network
|
||||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) {
|
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog) {
|
||||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
|
||||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId);
|
TraceEvent("SharedTlog", tlogId);
|
||||||
// FIXME: Pass the worker id instead of stubbing it
|
|
||||||
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
|
|
||||||
try {
|
try {
|
||||||
if(restoreFromDisk) {
|
if(restoreFromDisk) {
|
||||||
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
||||||
|
@ -2371,7 +2368,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
self.terminated.send(Void());
|
self.terminated.send(Void());
|
||||||
TraceEvent("TLogError", tlogId).error(e, true);
|
TraceEvent("TLogError", tlogId).error(e, true);
|
||||||
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
|
|
||||||
if(recovered.canBeSet()) recovered.send(Void());
|
if(recovered.canBeSet()) recovered.send(Void());
|
||||||
|
|
||||||
while(!tlogRequests.isEmpty()) {
|
while(!tlogRequests.isEmpty()) {
|
||||||
|
|
|
@ -295,6 +295,7 @@ struct TLogData : NonCopyable {
|
||||||
std::map<UID, Reference<struct LogData>> id_data;
|
std::map<UID, Reference<struct LogData>> id_data;
|
||||||
|
|
||||||
UID dbgid;
|
UID dbgid;
|
||||||
|
UID workerID;
|
||||||
|
|
||||||
IKeyValueStore* persistentData;
|
IKeyValueStore* persistentData;
|
||||||
IDiskQueue* rawPersistentQueue;
|
IDiskQueue* rawPersistentQueue;
|
||||||
|
@ -337,8 +338,8 @@ struct TLogData : NonCopyable {
|
||||||
Reference<AsyncVar<bool>> degraded;
|
Reference<AsyncVar<bool>> degraded;
|
||||||
std::vector<TagsAndMessage> tempTagMessages;
|
std::vector<TagsAndMessage> tempTagMessages;
|
||||||
|
|
||||||
TLogData(UID dbgid, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
TLogData(UID dbgid, UID workerID, IKeyValueStore* persistentData, IDiskQueue * persistentQueue, Reference<AsyncVar<ServerDBInfo>> dbInfo, Reference<AsyncVar<bool>> degraded, std::string folder)
|
||||||
: dbgid(dbgid), instanceID(deterministicRandom()->randomUniqueID().first()),
|
: dbgid(dbgid), workerID(workerID), instanceID(deterministicRandom()->randomUniqueID().first()),
|
||||||
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
persistentData(persistentData), rawPersistentQueue(persistentQueue), persistentQueue(new TLogQueue(persistentQueue, dbgid)),
|
||||||
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
dbInfo(dbInfo), degraded(degraded), queueCommitBegin(0), queueCommitEnd(0),
|
||||||
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
diskQueueCommitBytes(0), largeDiskQueueCommitBytes(false), bytesInput(0), bytesDurable(0), targetVolatileBytes(SERVER_KNOBS->TLOG_SPILL_THRESHOLD), overheadBytesInput(0), overheadBytesDurable(0),
|
||||||
|
@ -499,15 +500,16 @@ struct LogData : NonCopyable, public ReferenceCounted<LogData> {
|
||||||
bool execOpCommitInProgress;
|
bool execOpCommitInProgress;
|
||||||
int txsTags;
|
int txsTags;
|
||||||
|
|
||||||
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags) : tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
explicit LogData(TLogData* tLogData, TLogInterface interf, Tag remoteTag, bool isPrimary, int logRouterTags, int txsTags, UID recruitmentID, ProtocolVersion protocolVersion, std::vector<Tag> tags, std::string context)
|
||||||
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
|
: tLogData(tLogData), knownCommittedVersion(0), logId(interf.id()),
|
||||||
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
cc("TLog", interf.id().toString()), bytesInput("BytesInput", cc), bytesDurable("BytesDurable", cc), remoteTag(remoteTag), isPrimary(isPrimary), logRouterTags(logRouterTags), txsTags(txsTags), recruitmentID(recruitmentID), protocolVersion(protocolVersion),
|
||||||
minPoppedTagVersion(0), minPoppedTag(invalidTag),
|
logSystem(new AsyncVar<Reference<ILogSystem>>()), logRouterPoppedVersion(0), durableKnownCommittedVersion(0), minKnownCommittedVersion(0), queuePoppedVersion(0), allTags(tags.begin(), tags.end()), terminated(tLogData->terminated.getFuture()),
|
||||||
|
minPoppedTagVersion(0), minPoppedTag(invalidTag),
|
||||||
// These are initialized differently on init() or recovery
|
// These are initialized differently on init() or recovery
|
||||||
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
recoveryCount(), stopped(false), initialized(false), queueCommittingVersion(0), newPersistentDataVersion(invalidVersion), unrecoveredBefore(1), recoveredAt(1), unpoppedRecoveredTags(0),
|
||||||
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
logRouterPopToVersion(0), locality(tagLocalityInvalid), execOpCommitInProgress(false)
|
||||||
{
|
{
|
||||||
startRole(Role::TRANSACTION_LOG, interf.id(), UID());
|
startRole(Role::TRANSACTION_LOG, interf.id(), tLogData->workerID, {{"SharedTLog", tLogData->dbgid.shortString()}}, context);
|
||||||
|
|
||||||
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
persistentDataVersion.init(LiteralStringRef("TLog.PersistentDataVersion"), cc.id);
|
||||||
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
persistentDataDurableVersion.init(LiteralStringRef("TLog.PersistentDataDurableVersion"), cc.id);
|
||||||
|
@ -683,7 +685,7 @@ ACTOR Future<Void> updatePoppedLocation( TLogData* self, Reference<LogData> logD
|
||||||
|
|
||||||
if (data->popped <= logData->persistentDataVersion) {
|
if (data->popped <= logData->persistentDataVersion) {
|
||||||
// Recover the next needed location in the Disk Queue from the index.
|
// Recover the next needed location in the Disk Queue from the index.
|
||||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
Standalone<RangeResultRef> kvrefs = wait(
|
||||||
self->persistentData->readRange(KeyRangeRef(
|
self->persistentData->readRange(KeyRangeRef(
|
||||||
persistTagMessageRefsKey(logData->logId, data->tag, data->popped),
|
persistTagMessageRefsKey(logData->logId, data->tag, data->popped),
|
||||||
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
|
persistTagMessageRefsKey(logData->logId, data->tag, logData->persistentDataVersion + 1)), 1));
|
||||||
|
@ -1461,7 +1463,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
|
if (req.tag.locality == tagLocalityTxs || req.tag == txsTag) {
|
||||||
Standalone<VectorRef<KeyValueRef>> kvs = wait(
|
Standalone<RangeResultRef> kvs = wait(
|
||||||
self->persistentData->readRange(KeyRangeRef(
|
self->persistentData->readRange(KeyRangeRef(
|
||||||
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
persistTagMessagesKey(logData->logId, req.tag, req.begin),
|
||||||
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
persistTagMessagesKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)), SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES));
|
||||||
|
@ -1480,7 +1482,7 @@ ACTOR Future<Void> tLogPeekMessages( TLogData* self, TLogPeekRequest req, Refere
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
// FIXME: Limit to approximately DESIRED_TOTATL_BYTES somehow.
|
||||||
Standalone<VectorRef<KeyValueRef>> kvrefs = wait(
|
Standalone<RangeResultRef> kvrefs = wait(
|
||||||
self->persistentData->readRange(KeyRangeRef(
|
self->persistentData->readRange(KeyRangeRef(
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
persistTagMessageRefsKey(logData->logId, req.tag, req.begin),
|
||||||
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
persistTagMessageRefsKey(logData->logId, req.tag, logData->persistentDataDurableVersion + 1)),
|
||||||
|
@ -1611,12 +1613,8 @@ ACTOR Future<Void> watchDegraded(TLogData* self) {
|
||||||
return Void();
|
return Void();
|
||||||
}
|
}
|
||||||
|
|
||||||
//This delay is divided into multiple delays to avoid marking the tlog as degraded because of a single SlowTask
|
wait(lowPriorityDelay(SERVER_KNOBS->TLOG_DEGRADED_DURATION));
|
||||||
state int loopCount = 0;
|
|
||||||
while(loopCount < SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT) {
|
|
||||||
wait(delay(SERVER_KNOBS->TLOG_DEGRADED_DURATION/SERVER_KNOBS->TLOG_DEGRADED_DELAY_COUNT, TaskPriority::Low));
|
|
||||||
loopCount++;
|
|
||||||
}
|
|
||||||
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
TraceEvent(SevWarnAlways, "TLogDegraded", self->dbgid);
|
||||||
TEST(true); //TLog degraded
|
TEST(true); //TLog degraded
|
||||||
self->degraded->set(true);
|
self->degraded->set(true);
|
||||||
|
@ -1870,7 +1868,7 @@ ACTOR Future<Void> rejoinMasters( TLogData* self, TLogInterface tli, DBRecoveryC
|
||||||
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
if ( self->dbInfo->get().master.id() != lastMasterID) {
|
||||||
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
// The TLogRejoinRequest is needed to establish communications with a new master, which doesn't have our TLogInterface
|
||||||
TLogRejoinRequest req(tli);
|
TLogRejoinRequest req(tli);
|
||||||
TraceEvent("TLogRejoining", self->dbgid).detail("Master", self->dbInfo->get().master.id());
|
TraceEvent("TLogRejoining", tli.id()).detail("Master", self->dbInfo->get().master.id());
|
||||||
choose {
|
choose {
|
||||||
when(TLogRejoinReply rep =
|
when(TLogRejoinReply rep =
|
||||||
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
wait(brokenPromiseToNever(self->dbInfo->get().master.tlogRejoin.getReply(req)))) {
|
||||||
|
@ -2333,13 +2331,13 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
wait(storage->init());
|
wait(storage->init());
|
||||||
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
state Future<Optional<Value>> fFormat = storage->readValue(persistFormat.key);
|
||||||
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
|
state Future<Optional<Value>> fRecoveryLocation = storage->readValue(persistRecoveryLocationKey);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fVers = storage->readRange(persistCurrentVersionKeys);
|
state Future<Standalone<RangeResultRef>> fVers = storage->readRange(persistCurrentVersionKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
state Future<Standalone<RangeResultRef>> fKnownCommitted = storage->readRange(persistKnownCommittedVersionKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLocality = storage->readRange(persistLocalityKeys);
|
state Future<Standalone<RangeResultRef>> fLocality = storage->readRange(persistLocalityKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
state Future<Standalone<RangeResultRef>> fLogRouterTags = storage->readRange(persistLogRouterTagsKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
state Future<Standalone<RangeResultRef>> fTxsTags = storage->readRange(persistTxsTagsKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
state Future<Standalone<RangeResultRef>> fRecoverCounts = storage->readRange(persistRecoveryCountKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
state Future<Standalone<RangeResultRef>> fProtocolVersions = storage->readRange(persistProtocolVersionKeys);
|
||||||
|
|
||||||
// FIXME: metadata in queue?
|
// FIXME: metadata in queue?
|
||||||
|
|
||||||
|
@ -2358,7 +2356,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!fFormat.get().present()) {
|
if (!fFormat.get().present()) {
|
||||||
Standalone<VectorRef<KeyValueRef>> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
Standalone<RangeResultRef> v = wait( self->persistentData->readRange( KeyRangeRef(StringRef(), LiteralStringRef("\xff")), 1 ) );
|
||||||
if (!v.size()) {
|
if (!v.size()) {
|
||||||
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
TEST(true); // The DB is completely empty, so it was never initialized. Delete it.
|
||||||
throw worker_removed();
|
throw worker_removed();
|
||||||
|
@ -2424,7 +2422,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() );
|
ProtocolVersion protocolVersion = BinaryReader::fromStringRef<ProtocolVersion>( fProtocolVersions.get()[idx].value, Unversioned() );
|
||||||
|
|
||||||
//We do not need the remoteTag, because we will not be loading any additional data
|
//We do not need the remoteTag, because we will not be loading any additional data
|
||||||
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>()) );
|
logData = Reference<LogData>( new LogData(self, recruited, Tag(), true, id_logRouterTags[id1], id_txsTags[id1], UID(), protocolVersion, std::vector<Tag>(), "Restored") );
|
||||||
logData->locality = id_locality[id1];
|
logData->locality = id_locality[id1];
|
||||||
logData->stopped = true;
|
logData->stopped = true;
|
||||||
self->id_data[id1] = logData;
|
self->id_data[id1] = logData;
|
||||||
|
@ -2446,7 +2444,7 @@ ACTOR Future<Void> restorePersistentState( TLogData* self, LocalityData locality
|
||||||
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
tagKeys = prefixRange( rawId.withPrefix(persistTagPoppedKeys.begin) );
|
||||||
loop {
|
loop {
|
||||||
if(logData->removed.isReady()) break;
|
if(logData->removed.isReady()) break;
|
||||||
Standalone<VectorRef<KeyValueRef>> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
Standalone<RangeResultRef> data = wait( self->persistentData->readRange( tagKeys, BUGGIFY ? 3 : 1<<30, 1<<20 ) );
|
||||||
if (!data.size()) break;
|
if (!data.size()) break;
|
||||||
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
((KeyRangeRef&)tagKeys) = KeyRangeRef( keyAfter(data.back().key, tagKeys.arena()), tagKeys.end );
|
||||||
|
|
||||||
|
@ -2631,7 +2629,8 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
||||||
|
|
||||||
stopAllTLogs(self, recruited.id());
|
stopAllTLogs(self, recruited.id());
|
||||||
|
|
||||||
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags) );
|
bool recovering = (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned);
|
||||||
|
state Reference<LogData> logData = Reference<LogData>( new LogData(self, recruited, req.remoteTag, req.isPrimary, req.logRouterTags, req.txsTags, req.recruitmentID, currentProtocolVersion, req.allTags, recovering ? "Recovered" : "Recruited") );
|
||||||
self->id_data[recruited.id()] = logData;
|
self->id_data[recruited.id()] = logData;
|
||||||
logData->locality = req.locality;
|
logData->locality = req.locality;
|
||||||
logData->recoveryCount = req.epoch;
|
logData->recoveryCount = req.epoch;
|
||||||
|
@ -2649,7 +2648,7 @@ ACTOR Future<Void> tLogStart( TLogData* self, InitializeTLogRequest req, Localit
|
||||||
throw logData->removed.getError();
|
throw logData->removed.getError();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (req.recoverFrom.logSystemType == LogSystemType::tagPartitioned) {
|
if (recovering) {
|
||||||
logData->unrecoveredBefore = req.startVersion;
|
logData->unrecoveredBefore = req.startVersion;
|
||||||
logData->recoveredAt = req.recoverAt;
|
logData->recoveredAt = req.recoverAt;
|
||||||
logData->knownCommittedVersion = req.startVersion - 1;
|
logData->knownCommittedVersion = req.startVersion - 1;
|
||||||
|
@ -2758,13 +2757,11 @@ ACTOR Future<Void> startSpillingInTenSeconds(TLogData* self, UID tlogId, Referen
|
||||||
}
|
}
|
||||||
|
|
||||||
// New tLog (if !recoverFrom.size()) or restore from network
|
// New tLog (if !recoverFrom.size()) or restore from network
|
||||||
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQueue, Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID, bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder, Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog ) {
|
||||||
state TLogData self( tlogId, persistentData, persistentQueue, db, degraded, folder );
|
state TLogData self( tlogId, workerID, persistentData, persistentQueue, db, degraded, folder );
|
||||||
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
state Future<Void> error = actorCollection( self.sharedActors.getFuture() );
|
||||||
|
|
||||||
TraceEvent("SharedTlog", tlogId);
|
TraceEvent("SharedTlog", tlogId);
|
||||||
// FIXME: Pass the worker id instead of stubbing it
|
|
||||||
startRole(Role::SHARED_TRANSACTION_LOG, tlogId, UID());
|
|
||||||
try {
|
try {
|
||||||
if(restoreFromDisk) {
|
if(restoreFromDisk) {
|
||||||
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
wait( restorePersistentState( &self, locality, oldLog, recovered, tlogRequests ) );
|
||||||
|
@ -2808,7 +2805,6 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
|
||||||
} catch (Error& e) {
|
} catch (Error& e) {
|
||||||
self.terminated.send(Void());
|
self.terminated.send(Void());
|
||||||
TraceEvent("TLogError", tlogId).error(e, true);
|
TraceEvent("TLogError", tlogId).error(e, true);
|
||||||
endRole(Role::SHARED_TRANSACTION_LOG, tlogId, "Error", true);
|
|
||||||
if(recovered.canBeSet()) recovered.send(Void());
|
if(recovered.canBeSet()) recovered.send(Void());
|
||||||
|
|
||||||
while(!tlogRequests.isEmpty()) {
|
while(!tlogRequests.isEmpty()) {
|
||||||
|
|
|
@ -4859,22 +4859,26 @@ public:
|
||||||
m_tree->set(keyValue);
|
m_tree->set(keyValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
Future< Standalone< VectorRef< KeyValueRef > > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) {
|
Future< Standalone< RangeResultRef > > readRange(KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30) {
|
||||||
debug_printf("READRANGE %s\n", printable(keys).c_str());
|
debug_printf("READRANGE %s\n", printable(keys).c_str());
|
||||||
return catchError(readRange_impl(this, keys, rowLimit, byteLimit));
|
return catchError(readRange_impl(this, keys, rowLimit, byteLimit));
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR static Future< Standalone< VectorRef< KeyValueRef > > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) {
|
ACTOR static Future< Standalone< RangeResultRef > > readRange_impl(KeyValueStoreRedwoodUnversioned *self, KeyRange keys, int rowLimit, int byteLimit) {
|
||||||
self->m_tree->counts.getRanges++;
|
self->m_tree->counts.getRanges++;
|
||||||
state Standalone<VectorRef<KeyValueRef>> result;
|
state Standalone<RangeResultRef> result;
|
||||||
state int accumulatedBytes = 0;
|
state int accumulatedBytes = 0;
|
||||||
ASSERT( byteLimit > 0 );
|
ASSERT( byteLimit > 0 );
|
||||||
|
|
||||||
|
if(rowLimit == 0) {
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
state Reference<IStoreCursor> cur = self->m_tree->readAtVersion(self->m_tree->getLastCommittedVersion());
|
||||||
// Prefetch is currently only done in the forward direction
|
// Prefetch is currently only done in the forward direction
|
||||||
state int prefetchBytes = rowLimit > 1 ? byteLimit : 0;
|
state int prefetchBytes = rowLimit > 1 ? byteLimit : 0;
|
||||||
|
|
||||||
if(rowLimit >= 0) {
|
if(rowLimit > 0) {
|
||||||
wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes));
|
wait(cur->findFirstEqualOrGreater(keys.begin, prefetchBytes));
|
||||||
while(cur->isValid() && cur->getKey() < keys.end) {
|
while(cur->isValid() && cur->getKey() < keys.end) {
|
||||||
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
|
KeyValueRef kv(KeyRef(result.arena(), cur->getKey()), ValueRef(result.arena(), cur->getValue()));
|
||||||
|
@ -4900,6 +4904,12 @@ public:
|
||||||
wait(cur->prev());
|
wait(cur->prev());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
result.more = rowLimit == 0 || accumulatedBytes >= byteLimit;
|
||||||
|
if(result.more) {
|
||||||
|
ASSERT(result.size() > 0);
|
||||||
|
result.readThrough = result[result.size()-1].key;
|
||||||
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -418,7 +418,7 @@ private:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details = std::map<std::string, std::string>(), std::string origination = "Recruited");
|
void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details = std::map<std::string, std::string>(), const std::string &origination = "Recruited");
|
||||||
void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error());
|
void endRole(const Role &role, UID id, std::string reason, bool ok = true, Error e = Error());
|
||||||
|
|
||||||
struct ServerDBInfo;
|
struct ServerDBInfo;
|
||||||
|
@ -455,8 +455,8 @@ ACTOR Future<Void> masterProxyServer(MasterProxyInterface proxy, InitializeMaste
|
||||||
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
|
Reference<AsyncVar<ServerDBInfo>> db, std::string whitelistBinPaths);
|
||||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
|
||||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||||
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
||||||
|
|
||||||
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
ACTOR Future<Void> monitorServerDBInfo(Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface,
|
||||||
|
@ -474,13 +474,13 @@ void updateCpuProfiler(ProfilerRequest req);
|
||||||
|
|
||||||
namespace oldTLog_4_6 {
|
namespace oldTLog_4_6 {
|
||||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId);
|
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality, UID tlogId, UID workerID);
|
||||||
}
|
}
|
||||||
namespace oldTLog_6_0 {
|
namespace oldTLog_6_0 {
|
||||||
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
ACTOR Future<Void> tLog(IKeyValueStore* persistentData, IDiskQueue* persistentQueue,
|
||||||
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
Reference<AsyncVar<ServerDBInfo>> db, LocalityData locality,
|
||||||
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, bool restoreFromDisk,
|
PromiseStream<InitializeTLogRequest> tlogRequests, UID tlogId, UID workerID,
|
||||||
Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
bool restoreFromDisk, Promise<Void> oldLog, Promise<Void> recovered, std::string folder,
|
||||||
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
Reference<AsyncVar<bool>> degraded, Reference<AsyncVar<UID>> activeSharedTLog);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -665,8 +665,8 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
||||||
|
|
||||||
TraceEvent("MasterRecovering", self->dbgid).detail("LastEpochEnd", self->lastEpochEnd).detail("RecoveryTransactionVersion", self->recoveryTransactionVersion);
|
TraceEvent("MasterRecovering", self->dbgid).detail("LastEpochEnd", self->lastEpochEnd).detail("RecoveryTransactionVersion", self->recoveryTransactionVersion);
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> rawConf = wait( self->txnStateStore->readRange( configKeys ) );
|
Standalone<RangeResultRef> rawConf = wait( self->txnStateStore->readRange( configKeys ) );
|
||||||
self->configuration.fromKeyValues( rawConf );
|
self->configuration.fromKeyValues( rawConf.castTo<VectorRef<KeyValueRef>>() );
|
||||||
self->originalConfiguration = self->configuration;
|
self->originalConfiguration = self->configuration;
|
||||||
self->hasConfiguration = true;
|
self->hasConfiguration = true;
|
||||||
|
|
||||||
|
@ -676,13 +676,13 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
||||||
.detail("Conf", self->configuration.toString())
|
.detail("Conf", self->configuration.toString())
|
||||||
.trackLatest("RecoveredConfig");
|
.trackLatest("RecoveredConfig");
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) );
|
Standalone<RangeResultRef> rawLocalities = wait( self->txnStateStore->readRange( tagLocalityListKeys ) );
|
||||||
self->dcId_locality.clear();
|
self->dcId_locality.clear();
|
||||||
for(auto& kv : rawLocalities) {
|
for(auto& kv : rawLocalities) {
|
||||||
self->dcId_locality[decodeTagLocalityListKey(kv.key)] = decodeTagLocalityListValue(kv.value);
|
self->dcId_locality[decodeTagLocalityListKey(kv.key)] = decodeTagLocalityListValue(kv.value);
|
||||||
}
|
}
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
|
Standalone<RangeResultRef> rawTags = wait( self->txnStateStore->readRange( serverTagKeys ) );
|
||||||
self->allTags.clear();
|
self->allTags.clear();
|
||||||
|
|
||||||
if(self->forceRecovery) {
|
if(self->forceRecovery) {
|
||||||
|
@ -699,7 +699,7 @@ ACTOR Future<Void> readTransactionSystemState( Reference<MasterData> self, Refer
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) );
|
Standalone<RangeResultRef> rawHistoryTags = wait( self->txnStateStore->readRange( serverTagHistoryKeys ) );
|
||||||
for(auto& kv : rawHistoryTags) {
|
for(auto& kv : rawHistoryTags) {
|
||||||
self->allTags.push_back(decodeServerTagValue( kv.value ));
|
self->allTags.push_back(decodeServerTagValue( kv.value ));
|
||||||
}
|
}
|
||||||
|
@ -722,13 +722,13 @@ ACTOR Future<Void> sendInitialCommitToResolvers( Reference<MasterData> self ) {
|
||||||
state Sequence txnSequence = 0;
|
state Sequence txnSequence = 0;
|
||||||
ASSERT(self->recoveryTransactionVersion);
|
ASSERT(self->recoveryTransactionVersion);
|
||||||
|
|
||||||
state Standalone<VectorRef<KeyValueRef>> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
state Standalone<RangeResultRef> data = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||||
state vector<Future<Void>> txnReplies;
|
state vector<Future<Void>> txnReplies;
|
||||||
state int64_t dataOutstanding = 0;
|
state int64_t dataOutstanding = 0;
|
||||||
loop {
|
loop {
|
||||||
if(!data.size()) break;
|
if(!data.size()) break;
|
||||||
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
((KeyRangeRef&)txnKeys) = KeyRangeRef( keyAfter(data.back().key, txnKeys.arena()), txnKeys.end );
|
||||||
Standalone<VectorRef<KeyValueRef>> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
Standalone<RangeResultRef> nextData = self->txnStateStore->readRange(txnKeys, BUGGIFY ? 3 : SERVER_KNOBS->DESIRED_TOTAL_BYTES, SERVER_KNOBS->DESIRED_TOTAL_BYTES).get();
|
||||||
|
|
||||||
for(auto& r : self->proxies) {
|
for(auto& r : self->proxies) {
|
||||||
TxnStateRequest req;
|
TxnStateRequest req;
|
||||||
|
|
|
@ -185,7 +185,7 @@ struct StorageServerDisk {
|
||||||
Future<Key> readNextKeyInclusive( KeyRef key ) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); }
|
Future<Key> readNextKeyInclusive( KeyRef key ) { return readFirstKey(storage, KeyRangeRef(key, allKeys.end)); }
|
||||||
Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID = Optional<UID>() ) { return storage->readValue(key, debugID); }
|
Future<Optional<Value>> readValue( KeyRef key, Optional<UID> debugID = Optional<UID>() ) { return storage->readValue(key, debugID); }
|
||||||
Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>() ) { return storage->readValuePrefix(key, maxLength, debugID); }
|
Future<Optional<Value>> readValuePrefix( KeyRef key, int maxLength, Optional<UID> debugID = Optional<UID>() ) { return storage->readValuePrefix(key, maxLength, debugID); }
|
||||||
Future<Standalone<VectorRef<KeyValueRef>>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); }
|
Future<Standalone<RangeResultRef>> readRange( KeyRangeRef keys, int rowLimit = 1<<30, int byteLimit = 1<<30 ) { return storage->readRange(keys, rowLimit, byteLimit); }
|
||||||
|
|
||||||
KeyValueStoreType getKeyValueStoreType() { return storage->getType(); }
|
KeyValueStoreType getKeyValueStoreType() { return storage->getType(); }
|
||||||
StorageBytes getStorageBytes() { return storage->getStorageBytes(); }
|
StorageBytes getStorageBytes() { return storage->getStorageBytes(); }
|
||||||
|
@ -197,7 +197,7 @@ private:
|
||||||
void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext );
|
void writeMutations( MutationListRef mutations, Version debugVersion, const char* debugContext );
|
||||||
|
|
||||||
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
|
ACTOR static Future<Key> readFirstKey( IKeyValueStore* storage, KeyRangeRef range ) {
|
||||||
Standalone<VectorRef<KeyValueRef>> r = wait( storage->readRange( range, 1 ) );
|
Standalone<RangeResultRef> r = wait( storage->readRange( range, 1 ) );
|
||||||
if (r.size()) return r[0].key;
|
if (r.size()) return r[0].key;
|
||||||
else return range.end;
|
else return range.end;
|
||||||
}
|
}
|
||||||
|
@ -1045,17 +1045,19 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
|
||||||
// Combines data from base (at an older version) with sets from newer versions in [start, end) and appends the first (up to) |limit| rows to output
|
// Combines data from base (at an older version) with sets from newer versions in [start, end) and appends the first (up to) |limit| rows to output
|
||||||
// If limit<0, base and output are in descending order, and start->key()>end->key(), but start is still inclusive and end is exclusive
|
// If limit<0, base and output are in descending order, and start->key()>end->key(), but start is still inclusive and end is exclusive
|
||||||
{
|
{
|
||||||
if (limit==0) return;
|
ASSERT(limit != 0);
|
||||||
int originalLimit = abs(limit) + output.size();
|
|
||||||
bool forward = limit>0;
|
bool forward = limit>0;
|
||||||
if (!forward) limit = -limit;
|
if (!forward) limit = -limit;
|
||||||
|
int adjustedLimit = limit + output.size();
|
||||||
int accumulatedBytes = 0;
|
int accumulatedBytes = 0;
|
||||||
|
|
||||||
KeyValueRef const* baseStart = base.begin();
|
KeyValueRef const* baseStart = base.begin();
|
||||||
KeyValueRef const* baseEnd = base.end();
|
KeyValueRef const* baseEnd = base.end();
|
||||||
while (baseStart!=baseEnd && start!=end && --limit>=0 && accumulatedBytes < limitBytes) {
|
while (baseStart!=baseEnd && start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||||
if (forward ? baseStart->key < start.key() : baseStart->key > start.key())
|
if (forward ? baseStart->key < start.key() : baseStart->key > start.key()) {
|
||||||
output.push_back_deep( arena, *baseStart++ );
|
output.push_back_deep( arena, *baseStart++ );
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
||||||
if (baseStart->key == start.key()) ++baseStart;
|
if (baseStart->key == start.key()) ++baseStart;
|
||||||
|
@ -1063,18 +1065,17 @@ void merge( Arena& arena, VectorRef<KeyValueRef, VecSerStrategy::String>& output
|
||||||
}
|
}
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||||
}
|
}
|
||||||
while (baseStart!=baseEnd && --limit>=0 && accumulatedBytes < limitBytes) {
|
while (baseStart!=baseEnd && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||||
output.push_back_deep( arena, *baseStart++ );
|
output.push_back_deep( arena, *baseStart++ );
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||||
}
|
}
|
||||||
if( !stopAtEndOfBase ) {
|
if( !stopAtEndOfBase ) {
|
||||||
while (start!=end && --limit>=0 && accumulatedBytes < limitBytes) {
|
while (start!=end && output.size() < adjustedLimit && accumulatedBytes < limitBytes) {
|
||||||
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
output.push_back_deep( arena, KeyValueRef(start.key(), start->getValue()) );
|
||||||
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
accumulatedBytes += sizeof(KeyValueRef) + output.end()[-1].expectedSize();
|
||||||
if (forward) ++start; else --start;
|
if (forward) ++start; else --start;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ASSERT( output.size() <= originalLimit );
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// readRange reads up to |limit| rows from the given range and version, combining data->storage and data->versionedData.
|
// readRange reads up to |limit| rows from the given range and version, combining data->storage and data->versionedData.
|
||||||
|
@ -1089,14 +1090,8 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
||||||
state KeyRef readEnd;
|
state KeyRef readEnd;
|
||||||
state Key readBeginTemp;
|
state Key readBeginTemp;
|
||||||
state int vCount;
|
state int vCount;
|
||||||
//state UID rrid = deterministicRandom()->randomUniqueID();
|
|
||||||
//state int originalLimit = limit;
|
|
||||||
//state int originalLimitBytes = *pLimitBytes;
|
|
||||||
//state bool track = rrid.first() == 0x1bc134c2f752187cLL;
|
|
||||||
|
|
||||||
// FIXME: Review pLimitBytes behavior
|
|
||||||
// if (limit >= 0) we are reading forward, else backward
|
// if (limit >= 0) we are reading forward, else backward
|
||||||
|
|
||||||
if (limit >= 0) {
|
if (limit >= 0) {
|
||||||
// We might care about a clear beginning before start that
|
// We might care about a clear beginning before start that
|
||||||
// runs into range
|
// runs into range
|
||||||
|
@ -1108,20 +1103,7 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
||||||
|
|
||||||
vStart = view.lower_bound(readBegin);
|
vStart = view.lower_bound(readBegin);
|
||||||
|
|
||||||
/*if (track) {
|
|
||||||
printf("readRange(%llx, @%lld, '%s'-'%s')\n", data->thisServerID.first(), version, printable(range.begin).c_str(), printable(range.end).c_str());
|
|
||||||
printf("mvcc:\n");
|
|
||||||
vEnd = view.upper_bound(range.end);
|
|
||||||
for(auto r=vStart; r != vEnd; ++r) {
|
|
||||||
if (r->isClearTo())
|
|
||||||
printf(" '%s'-'%s' cleared\n", printable(r.key()).c_str(), printable(r->getEndKey()).c_str());
|
|
||||||
else
|
|
||||||
printf(" '%s' := '%s'\n", printable(r.key()).c_str(), printable(r->getValue()).c_str());
|
|
||||||
}
|
|
||||||
}*/
|
|
||||||
|
|
||||||
while (limit>0 && *pLimitBytes>0 && readBegin < range.end) {
|
while (limit>0 && *pLimitBytes>0 && readBegin < range.end) {
|
||||||
// ASSERT( vStart == view.lower_bound(readBegin) );
|
|
||||||
ASSERT( !vStart || vStart.key() >= readBegin );
|
ASSERT( !vStart || vStart.key() >= readBegin );
|
||||||
if (vStart) { auto b = vStart; --b; ASSERT( !b || b.key() < readBegin ); }
|
if (vStart) { auto b = vStart; --b; ASSERT( !b || b.key() < readBegin ); }
|
||||||
ASSERT( data->storageVersion() <= version );
|
ASSERT( data->storageVersion() <= version );
|
||||||
|
@ -1138,93 +1120,58 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
||||||
|
|
||||||
// Read the data on disk up to vEnd (or the end of the range)
|
// Read the data on disk up to vEnd (or the end of the range)
|
||||||
readEnd = vEnd ? std::min( vEnd.key(), range.end ) : range.end;
|
readEnd = vEnd ? std::min( vEnd.key(), range.end ) : range.end;
|
||||||
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait(
|
Standalone<RangeResultRef> atStorageVersion = wait(
|
||||||
data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes ) );
|
data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes ) );
|
||||||
|
|
||||||
/*if (track) {
|
|
||||||
printf("read [%s,%s): %d rows\n", printable(readBegin).c_str(), printable(readEnd).c_str(), atStorageVersion.size());
|
|
||||||
for(auto r=atStorageVersion.begin(); r != atStorageVersion.end(); ++r)
|
|
||||||
printf(" '%s' := '%s'\n", printable(r->key).c_str(), printable(r->value).c_str());
|
|
||||||
}*/
|
|
||||||
|
|
||||||
ASSERT( atStorageVersion.size() <= limit );
|
ASSERT( atStorageVersion.size() <= limit );
|
||||||
if (data->storageVersion() > version) throw transaction_too_old();
|
if (data->storageVersion() > version) throw transaction_too_old();
|
||||||
|
|
||||||
bool more = atStorageVersion.size()!=0;
|
// merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if we were limited
|
||||||
|
|
||||||
// merge the sets in [vStart,vEnd) with the sets on disk, stopping at the last key from disk if there is 'more'
|
|
||||||
int prevSize = result.data.size();
|
int prevSize = result.data.size();
|
||||||
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, more, *pLimitBytes );
|
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes );
|
||||||
limit -= result.data.size() - prevSize;
|
limit -= result.data.size() - prevSize;
|
||||||
|
|
||||||
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++)
|
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++)
|
||||||
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
||||||
|
|
||||||
// Setup for the next iteration
|
if (limit <=0 || *pLimitBytes <= 0) {
|
||||||
if (more) { // if there might be more data, begin reading right after what we already found to find out
|
break;
|
||||||
//if (track) printf("more\n");
|
}
|
||||||
if (!(limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key))
|
|
||||||
TraceEvent(SevError, "ReadRangeIssue", data->thisServerID).detail("ReadBegin", readBegin).detail("ReadEnd", readEnd)
|
|
||||||
.detail("VStart", vStart ? vStart.key() : LiteralStringRef("nil")).detail("VEnd", vEnd ? vEnd.key() : LiteralStringRef("nil"))
|
|
||||||
.detail("AtStorageVersionBack", atStorageVersion.end()[-1].key).detail("ResultBack", result.data.end()[-1].key)
|
|
||||||
.detail("Limit", limit).detail("LimitBytes", *pLimitBytes).detail("ResultSize", result.data.size()).detail("PrevSize", prevSize);
|
|
||||||
readBegin = readBeginTemp = keyAfter( result.data.end()[-1].key );
|
|
||||||
ASSERT( limit<=0 || *pLimitBytes<=0 || result.data.end()[-1].key == atStorageVersion.end()[-1].key );
|
|
||||||
} else if (vStart && vStart->isClearTo()){ // if vStart is a clear, skip it.
|
|
||||||
//if (track) printf("skip clear\n");
|
|
||||||
readBegin = vStart->getEndKey(); // next disk read should start at the end of the clear
|
|
||||||
++vStart;
|
|
||||||
} else { // Otherwise, continue at readEnd
|
|
||||||
//if (track) printf("continue\n");
|
|
||||||
readBegin = readEnd;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// all but the last item are less than *pLimitBytes
|
|
||||||
ASSERT( result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0 );
|
|
||||||
/*if (*pLimitBytes <= 0)
|
|
||||||
TraceEvent(SevWarn, "ReadRangeLimitExceeded")
|
|
||||||
.detail("Version", version)
|
|
||||||
.detail("Begin", range.begin )
|
|
||||||
.detail("End", range.end )
|
|
||||||
.detail("LimitReamin", limit)
|
|
||||||
.detail("LimitBytesRemain", *pLimitBytes); */
|
|
||||||
|
|
||||||
/*GetKeyValuesReply correct = wait( readRangeOld(data, version, range, originalLimit, originalLimitBytes) );
|
// If we hit our limits reading from disk but then combining with MVCC gave us back more room
|
||||||
bool prefix_equal = true;
|
if (atStorageVersion.more) {
|
||||||
int totalsize = 0;
|
ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
|
||||||
int first_difference = -1;
|
readBegin = readBeginTemp = keyAfter(result.data.end()[-1].key);
|
||||||
for(int i=0; i<result.data.size() && i<correct.data.size(); i++) {
|
} else if (vEnd && vEnd->isClearTo()) {
|
||||||
if (result.data[i] != correct.data[i]) {
|
ASSERT(vStart == vEnd); // vStart will have been advanced by merge()
|
||||||
first_difference = i;
|
ASSERT(vEnd->getEndKey() > readBegin);
|
||||||
prefix_equal = false;
|
readBegin = vEnd->getEndKey();
|
||||||
|
++vStart;
|
||||||
|
} else {
|
||||||
|
ASSERT(readEnd == range.end);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
totalsize += result.data[i].expectedSize() + sizeof(KeyValueRef);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// for the following check
|
|
||||||
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
|
|
||||||
result.version = version;
|
|
||||||
if ( !(totalsize>originalLimitBytes ? prefix_equal : result.data==correct.data) || correct.more != result.more ) {
|
|
||||||
TraceEvent(SevError, "IncorrectResult", rrid).detail("Server", data->thisServerID).detail("CorrectRows", correct.data.size())
|
|
||||||
.detail("FirstDifference", first_difference).detail("OriginalLimit", originalLimit)
|
|
||||||
.detail("ResultRows", result.data.size()).detail("Result0", result.data[0].key).detail("Correct0", correct.data[0].key)
|
|
||||||
.detail("ResultN", result.data.size() ? result.data[std::min(correct.data.size(),result.data.size())-1].key : "nil")
|
|
||||||
.detail("CorrectN", correct.data.size() ? correct.data[std::min(correct.data.size(),result.data.size())-1].key : "nil");
|
|
||||||
}*/
|
|
||||||
} else {
|
} else {
|
||||||
// Reverse read - abandon hope alle ye who enter here
|
vStart = view.lastLess(range.end);
|
||||||
readEnd = range.end;
|
|
||||||
|
|
||||||
vStart = view.lastLess(readEnd);
|
|
||||||
|
|
||||||
// A clear might extend all the way to range.end
|
// A clear might extend all the way to range.end
|
||||||
if (vStart && vStart->isClearTo() && vStart->getEndKey() >= readEnd) {
|
if (vStart && vStart->isClearTo() && vStart->getEndKey() >= range.end) {
|
||||||
readEnd = vStart.key();
|
readEnd = vStart.key();
|
||||||
--vStart;
|
--vStart;
|
||||||
|
} else {
|
||||||
|
readEnd = range.end;
|
||||||
}
|
}
|
||||||
|
|
||||||
while (limit < 0 && *pLimitBytes > 0 && readEnd > range.begin) {
|
while (limit < 0 && *pLimitBytes > 0 && readEnd > range.begin) {
|
||||||
|
ASSERT(!vStart || vStart.key() < readEnd);
|
||||||
|
if (vStart) {
|
||||||
|
auto b = vStart;
|
||||||
|
++b;
|
||||||
|
ASSERT(!b || b.key() >= readEnd);
|
||||||
|
}
|
||||||
|
ASSERT(data->storageVersion() <= version);
|
||||||
|
|
||||||
vEnd = vStart;
|
vEnd = vStart;
|
||||||
vCount = 0;
|
vCount = 0;
|
||||||
int vSize=0;
|
int vSize=0;
|
||||||
|
@ -1234,30 +1181,42 @@ ACTOR Future<GetKeyValuesReply> readRange( StorageServer* data, Version version,
|
||||||
--vEnd;
|
--vEnd;
|
||||||
}
|
}
|
||||||
|
|
||||||
readBegin = range.begin;
|
readBegin = vEnd ? std::max(vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key(), range.begin) : range.begin;
|
||||||
if (vEnd)
|
Standalone<RangeResultRef> atStorageVersion =
|
||||||
readBegin = std::max( readBegin, vEnd->isClearTo() ? vEnd->getEndKey() : vEnd.key() );
|
wait(data->storage.readRange(KeyRangeRef(readBegin, readEnd), limit, *pLimitBytes));
|
||||||
|
|
||||||
Standalone<VectorRef<KeyValueRef>> atStorageVersion = wait( data->storage.readRange( KeyRangeRef(readBegin, readEnd), limit ) );
|
ASSERT(atStorageVersion.size() <= -limit);
|
||||||
if (data->storageVersion() > version) throw transaction_too_old();
|
if (data->storageVersion() > version) throw transaction_too_old();
|
||||||
|
|
||||||
int prevSize = result.data.size();
|
int prevSize = result.data.size();
|
||||||
merge( result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, false, *pLimitBytes );
|
merge(result.arena, result.data, atStorageVersion, vStart, vEnd, vCount, limit, atStorageVersion.more, *pLimitBytes);
|
||||||
limit += result.data.size() - prevSize;
|
limit += result.data.size() - prevSize;
|
||||||
|
|
||||||
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++)
|
for (auto i = result.data.begin() + prevSize; i != result.data.end(); i++)
|
||||||
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
*pLimitBytes -= sizeof(KeyValueRef) + i->expectedSize();
|
||||||
|
|
||||||
vStart = vEnd;
|
if (limit >=0 || *pLimitBytes <= 0) {
|
||||||
readEnd = readBegin;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
if (vStart && vStart->isClearTo()) {
|
if (atStorageVersion.more) {
|
||||||
ASSERT( vStart.key() < readEnd );
|
ASSERT(result.data.end()[-1].key == atStorageVersion.end()[-1].key);
|
||||||
readEnd = vStart.key();
|
readEnd = result.data.end()[-1].key;
|
||||||
|
} else if (vEnd && vEnd->isClearTo()) {
|
||||||
|
ASSERT(vStart == vEnd);
|
||||||
|
ASSERT(vEnd.key() < readEnd)
|
||||||
|
readEnd = vEnd.key();
|
||||||
--vStart;
|
--vStart;
|
||||||
|
} else {
|
||||||
|
ASSERT(readBegin == range.begin);
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// all but the last item are less than *pLimitBytes
|
||||||
|
ASSERT(result.data.size() == 0 || *pLimitBytes + result.data.end()[-1].expectedSize() + sizeof(KeyValueRef) > 0);
|
||||||
|
|
||||||
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
|
result.more = limit == 0 || *pLimitBytes<=0; // FIXME: Does this have to be exact?
|
||||||
result.version = version;
|
result.version = version;
|
||||||
return result;
|
return result;
|
||||||
|
@ -3036,8 +2995,8 @@ ACTOR Future<Void> applyByteSampleResult( StorageServer* data, IKeyValueStore* s
|
||||||
state int totalKeys = 0;
|
state int totalKeys = 0;
|
||||||
state int totalBytes = 0;
|
state int totalBytes = 0;
|
||||||
loop {
|
loop {
|
||||||
Standalone<VectorRef<KeyValueRef>> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) );
|
Standalone<RangeResultRef> bs = wait( storage->readRange( KeyRangeRef(begin, end), SERVER_KNOBS->STORAGE_LIMIT_BYTES, SERVER_KNOBS->STORAGE_LIMIT_BYTES ) );
|
||||||
if(results) results->push_back(bs);
|
if(results) results->push_back(bs.castTo<VectorRef<KeyValueRef>>());
|
||||||
int rangeSize = bs.expectedSize();
|
int rangeSize = bs.expectedSize();
|
||||||
totalFetches++;
|
totalFetches++;
|
||||||
totalKeys += bs.size();
|
totalKeys += bs.size();
|
||||||
|
@ -3118,8 +3077,8 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
||||||
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
|
state Future<Optional<Value>> fVersion = storage->readValue(persistVersion);
|
||||||
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
|
state Future<Optional<Value>> fLogProtocol = storage->readValue(persistLogProtocol);
|
||||||
state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality);
|
state Future<Optional<Value>> fPrimaryLocality = storage->readValue(persistPrimaryLocality);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAssigned = storage->readRange(persistShardAssignedKeys);
|
state Future<Standalone<RangeResultRef>> fShardAssigned = storage->readRange(persistShardAssignedKeys);
|
||||||
state Future<Standalone<VectorRef<KeyValueRef>>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
|
state Future<Standalone<RangeResultRef>> fShardAvailable = storage->readRange(persistShardAvailableKeys);
|
||||||
|
|
||||||
state Promise<Void> byteSampleSampleRecovered;
|
state Promise<Void> byteSampleSampleRecovered;
|
||||||
state Promise<Void> startByteSampleRestore;
|
state Promise<Void> startByteSampleRestore;
|
||||||
|
@ -3156,7 +3115,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
||||||
debug_checkRestoredVersion( data->thisServerID, version, "StorageServer" );
|
debug_checkRestoredVersion( data->thisServerID, version, "StorageServer" );
|
||||||
data->setInitialVersion( version );
|
data->setInitialVersion( version );
|
||||||
|
|
||||||
state Standalone<VectorRef<KeyValueRef>> available = fShardAvailable.get();
|
state Standalone<RangeResultRef> available = fShardAvailable.get();
|
||||||
state int availableLoc;
|
state int availableLoc;
|
||||||
for(availableLoc=0; availableLoc<available.size(); availableLoc++) {
|
for(availableLoc=0; availableLoc<available.size(); availableLoc++) {
|
||||||
KeyRangeRef keys(
|
KeyRangeRef keys(
|
||||||
|
@ -3170,7 +3129,7 @@ ACTOR Future<bool> restoreDurableState( StorageServer* data, IKeyValueStore* sto
|
||||||
wait(yield());
|
wait(yield());
|
||||||
}
|
}
|
||||||
|
|
||||||
state Standalone<VectorRef<KeyValueRef>> assigned = fShardAssigned.get();
|
state Standalone<RangeResultRef> assigned = fShardAssigned.get();
|
||||||
state int assignedLoc;
|
state int assignedLoc;
|
||||||
for(assignedLoc=0; assignedLoc<assigned.size(); assignedLoc++) {
|
for(assignedLoc=0; assignedLoc<assigned.size(); assignedLoc++) {
|
||||||
KeyRangeRef keys(
|
KeyRangeRef keys(
|
||||||
|
@ -3361,6 +3320,7 @@ ACTOR Future<Void> waitMetrics( StorageServerMetrics* self, WaitMetricsRequest r
|
||||||
|
|
||||||
if( timedout ) {
|
if( timedout ) {
|
||||||
TEST( true ); // ShardWaitMetrics return on timeout
|
TEST( true ); // ShardWaitMetrics return on timeout
|
||||||
|
//FIXME: instead of using random chance, send wrong_shard_server when the call in from waitMetricsMultiple (requires additional information in the request)
|
||||||
if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) {
|
if(deterministicRandom()->random01() < SERVER_KNOBS->WAIT_METRICS_WRONG_SHARD_CHANCE) {
|
||||||
req.reply.sendError( wrong_shard_server() );
|
req.reply.sendError( wrong_shard_server() );
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -618,7 +618,7 @@ Standalone<StringRef> roleString(std::set<std::pair<std::string, std::string>> r
|
||||||
return StringRef(result);
|
return StringRef(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
void startRole(const Role &role, UID roleId, UID workerId, std::map<std::string, std::string> details, std::string origination) {
|
void startRole(const Role &role, UID roleId, UID workerId, const std::map<std::string, std::string> &details, const std::string &origination) {
|
||||||
if(role.includeInTraceRoles) {
|
if(role.includeInTraceRoles) {
|
||||||
addTraceRole(role.abbreviation);
|
addTraceRole(role.abbreviation);
|
||||||
}
|
}
|
||||||
|
@ -921,7 +921,7 @@ ACTOR Future<Void> workerServer(
|
||||||
auto& logData = sharedLogs[std::make_tuple(s.tLogOptions.version, s.storeType, s.tLogOptions.spillType)];
|
auto& logData = sharedLogs[std::make_tuple(s.tLogOptions.version, s.storeType, s.tLogOptions.spillType)];
|
||||||
// FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we
|
// FIXME: Shouldn't if logData.first isValid && !isReady, shouldn't we
|
||||||
// be sending a fake InitializeTLogRequest rather than calling tLog() ?
|
// be sending a fake InitializeTLogRequest rather than calling tLog() ?
|
||||||
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, true, oldLog, recovery, folder, degraded, activeSharedTLog );
|
Future<Void> tl = tLogFn( kv, queue, dbInfo, locality, !logData.actor.isValid() || logData.actor.isReady() ? logData.requests : PromiseStream<InitializeTLogRequest>(), s.storeID, interf.id(), true, oldLog, recovery, folder, degraded, activeSharedTLog );
|
||||||
recoveries.push_back(recovery.getFuture());
|
recoveries.push_back(recovery.getFuture());
|
||||||
activeSharedTLog->set(s.storeID);
|
activeSharedTLog->set(s.storeID);
|
||||||
|
|
||||||
|
@ -1087,7 +1087,7 @@ ACTOR Future<Void> workerServer(
|
||||||
filesClosed.add( data->onClosed() );
|
filesClosed.add( data->onClosed() );
|
||||||
filesClosed.add( queue->onClosed() );
|
filesClosed.add( queue->onClosed() );
|
||||||
|
|
||||||
Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog );
|
Future<Void> tLogCore = tLogFn( data, queue, dbInfo, locality, logData.requests, logId, interf.id(), false, Promise<Void>(), Promise<Void>(), folder, degraded, activeSharedTLog );
|
||||||
tLogCore = handleIOErrors( tLogCore, data, logId );
|
tLogCore = handleIOErrors( tLogCore, data, logId );
|
||||||
tLogCore = handleIOErrors( tLogCore, queue, logId );
|
tLogCore = handleIOErrors( tLogCore, queue, logId );
|
||||||
errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) );
|
errorForwarders.add( forwardError( errors, Role::SHARED_TRANSACTION_LOG, logId, tLogCore ) );
|
||||||
|
@ -1383,37 +1383,46 @@ ACTOR Future<UID> createAndLockProcessIdFile(std::string folder) {
|
||||||
state UID processIDUid;
|
state UID processIDUid;
|
||||||
platform::createDirectory(folder);
|
platform::createDirectory(folder);
|
||||||
|
|
||||||
try {
|
loop {
|
||||||
state std::string lockFilePath = joinPath(folder, "processId");
|
try {
|
||||||
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600)));
|
state std::string lockFilePath = joinPath(folder, "processId");
|
||||||
|
state ErrorOr<Reference<IAsyncFile>> lockFile = wait(errorOr(IAsyncFileSystem::filesystem(g_network)->open(lockFilePath, IAsyncFile::OPEN_READWRITE | IAsyncFile::OPEN_LOCK, 0600)));
|
||||||
|
|
||||||
if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) {
|
if (lockFile.isError() && lockFile.getError().code() == error_code_file_not_found && !fileExists(lockFilePath)) {
|
||||||
Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600));
|
Reference<IAsyncFile> _lockFile = wait(IAsyncFileSystem::filesystem()->open(lockFilePath, IAsyncFile::OPEN_ATOMIC_WRITE_AND_CREATE | IAsyncFile::OPEN_CREATE | IAsyncFile::OPEN_LOCK | IAsyncFile::OPEN_READWRITE, 0600));
|
||||||
lockFile = _lockFile;
|
lockFile = _lockFile;
|
||||||
processIDUid = deterministicRandom()->randomUniqueID();
|
processIDUid = deterministicRandom()->randomUniqueID();
|
||||||
BinaryWriter wr(IncludeVersion());
|
BinaryWriter wr(IncludeVersion());
|
||||||
wr << processIDUid;
|
wr << processIDUid;
|
||||||
wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0));
|
wait(lockFile.get()->write(wr.getData(), wr.getLength(), 0));
|
||||||
wait(lockFile.get()->sync());
|
wait(lockFile.get()->sync());
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception
|
if (lockFile.isError()) throw lockFile.getError(); // If we've failed to open the file, throw an exception
|
||||||
|
|
||||||
int64_t fileSize = wait(lockFile.get()->size());
|
int64_t fileSize = wait(lockFile.get()->size());
|
||||||
state Key fileData = makeString(fileSize);
|
state Key fileData = makeString(fileSize);
|
||||||
wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0)));
|
wait(success(lockFile.get()->read(mutateString(fileData), fileSize, 0)));
|
||||||
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion());
|
processIDUid = BinaryReader::fromStringRef<UID>(fileData, IncludeVersion());
|
||||||
|
}
|
||||||
|
return processIDUid;
|
||||||
}
|
}
|
||||||
}
|
catch (Error& e) {
|
||||||
catch (Error& e) {
|
if (e.code() == error_code_actor_cancelled) {
|
||||||
if (e.code() != error_code_actor_cancelled) {
|
throw;
|
||||||
if (!e.isInjectedFault())
|
}
|
||||||
|
if (!e.isInjectedFault()) {
|
||||||
fprintf(stderr, "ERROR: error creating or opening process id file `%s'.\n", joinPath(folder, "processId").c_str());
|
fprintf(stderr, "ERROR: error creating or opening process id file `%s'.\n", joinPath(folder, "processId").c_str());
|
||||||
|
}
|
||||||
TraceEvent(SevError, "OpenProcessIdError").error(e);
|
TraceEvent(SevError, "OpenProcessIdError").error(e);
|
||||||
|
|
||||||
|
if(!g_network->isSimulated()) {
|
||||||
|
throw;
|
||||||
|
}
|
||||||
|
|
||||||
|
deleteFile(lockFilePath);
|
||||||
}
|
}
|
||||||
throw;
|
|
||||||
}
|
}
|
||||||
return processIDUid;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> fdbd(
|
ACTOR Future<Void> fdbd(
|
||||||
|
|
|
@ -46,7 +46,7 @@ struct DDMetricsExcludeWorkload : TestWorkload {
|
||||||
|
|
||||||
ACTOR static Future<double> getMovingDataAmount(Database cx, DDMetricsExcludeWorkload* self) {
|
ACTOR static Future<double> getMovingDataAmount(Database cx, DDMetricsExcludeWorkload* self) {
|
||||||
try {
|
try {
|
||||||
StatusObject statusObj = wait(StatusClient::statusFetcher(cx->getConnectionFile()));
|
StatusObject statusObj = wait(StatusClient::statusFetcher(cx));
|
||||||
StatusObjectReader statusObjCluster;
|
StatusObjectReader statusObjCluster;
|
||||||
((StatusObjectReader)statusObj).get("cluster", statusObjCluster);
|
((StatusObjectReader)statusObj).get("cluster", statusObjCluster);
|
||||||
StatusObjectReader statusObjData;
|
StatusObjectReader statusObjData;
|
||||||
|
|
|
@ -270,7 +270,7 @@ ACTOR Future<Void> testKVStoreMain( KVStoreTestWorkload* workload, KVTest* ptest
|
||||||
state Key k;
|
state Key k;
|
||||||
state double cst = timer();
|
state double cst = timer();
|
||||||
while (true) {
|
while (true) {
|
||||||
Standalone<VectorRef<KeyValueRef>> kv = wait( test.store->readRange( KeyRangeRef(k, LiteralStringRef("\xff\xff\xff\xff")), 1000 ) );
|
Standalone<RangeResultRef> kv = wait( test.store->readRange( KeyRangeRef(k, LiteralStringRef("\xff\xff\xff\xff")), 1000 ) );
|
||||||
count += kv.size();
|
count += kv.size();
|
||||||
if (kv.size() < 1000) break;
|
if (kv.size() < 1000) break;
|
||||||
k = keyAfter( kv[ kv.size()-1 ].key );
|
k = keyAfter( kv[ kv.size()-1 ].key );
|
||||||
|
|
|
@ -69,7 +69,7 @@ struct StatusWorkload : TestWorkload {
|
||||||
if (clientId != 0)
|
if (clientId != 0)
|
||||||
return Void();
|
return Void();
|
||||||
|
|
||||||
return success(timeout(fetcher(cx->getConnectionFile(), this), testDuration));
|
return success(timeout(fetcher(cx, this), testDuration));
|
||||||
}
|
}
|
||||||
virtual Future<bool> check(Database const& cx) {
|
virtual Future<bool> check(Database const& cx) {
|
||||||
return errors.getValue() == 0;
|
return errors.getValue() == 0;
|
||||||
|
@ -161,7 +161,7 @@ struct StatusWorkload : TestWorkload {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ACTOR Future<Void> fetcher(Reference<ClusterConnectionFile> connFile, StatusWorkload *self) {
|
ACTOR Future<Void> fetcher(Database cx, StatusWorkload *self) {
|
||||||
state double lastTime = now();
|
state double lastTime = now();
|
||||||
|
|
||||||
loop{
|
loop{
|
||||||
|
@ -170,7 +170,7 @@ struct StatusWorkload : TestWorkload {
|
||||||
// Since we count the requests that start, we could potentially never really hear back?
|
// Since we count the requests that start, we could potentially never really hear back?
|
||||||
++self->requests;
|
++self->requests;
|
||||||
state double issued = now();
|
state double issued = now();
|
||||||
StatusObject result = wait(StatusClient::statusFetcher(connFile));
|
StatusObject result = wait(StatusClient::statusFetcher(cx));
|
||||||
++self->replies;
|
++self->replies;
|
||||||
BinaryWriter br(AssumeVersion(currentProtocolVersion));
|
BinaryWriter br(AssumeVersion(currentProtocolVersion));
|
||||||
save(br, result);
|
save(br, result);
|
||||||
|
|
|
@ -519,6 +519,10 @@ public:
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
template <class U> Standalone<U> castTo() const {
|
||||||
|
return Standalone<U>(*this, arena());
|
||||||
|
}
|
||||||
|
|
||||||
template <class Archive>
|
template <class Archive>
|
||||||
void serialize(Archive& ar) {
|
void serialize(Archive& ar) {
|
||||||
// FIXME: something like BinaryReader(ar) >> arena >> *(T*)this; to guarantee standalone arena???
|
// FIXME: something like BinaryReader(ar) >> arena >> *(T*)this; to guarantee standalone arena???
|
||||||
|
|
|
@ -105,6 +105,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
||||||
|
|
||||||
//GenericActors
|
//GenericActors
|
||||||
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
|
init( BUGGIFY_FLOW_LOCK_RELEASE_DELAY, 1.0 );
|
||||||
|
init( LOW_PRIORITY_DELAY_COUNT, 5 );
|
||||||
|
|
||||||
//IAsyncFile
|
//IAsyncFile
|
||||||
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
|
init( INCREMENTAL_DELETE_TRUNCATE_AMOUNT, 5e8 ); //500MB
|
||||||
|
|
|
@ -125,6 +125,7 @@ public:
|
||||||
|
|
||||||
//GenericActors
|
//GenericActors
|
||||||
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
|
double BUGGIFY_FLOW_LOCK_RELEASE_DELAY;
|
||||||
|
int LOW_PRIORITY_DELAY_COUNT;
|
||||||
|
|
||||||
//IAsyncFile
|
//IAsyncFile
|
||||||
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
|
int64_t INCREMENTAL_DELETE_TRUNCATE_AMOUNT;
|
||||||
|
|
|
@ -122,6 +122,7 @@ public:
|
||||||
|
|
||||||
// INetwork interface
|
// INetwork interface
|
||||||
virtual double now() { return currentTime; };
|
virtual double now() { return currentTime; };
|
||||||
|
virtual double timer() { return ::timer(); };
|
||||||
virtual Future<Void> delay( double seconds, TaskPriority taskId );
|
virtual Future<Void> delay( double seconds, TaskPriority taskId );
|
||||||
virtual Future<class Void> yield( TaskPriority taskID );
|
virtual Future<class Void> yield( TaskPriority taskID );
|
||||||
virtual bool check_yield(TaskPriority taskId);
|
virtual bool check_yield(TaskPriority taskId);
|
||||||
|
|
|
@ -83,3 +83,12 @@ ACTOR Future<bool> quorumEqualsTrue( std::vector<Future<bool>> futures, int requ
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ACTOR Future<Void> lowPriorityDelay( double waitTime ) {
|
||||||
|
state int loopCount = 0;
|
||||||
|
while(loopCount < FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT) {
|
||||||
|
wait(delay(waitTime/FLOW_KNOBS->LOW_PRIORITY_DELAY_COUNT, TaskPriority::Low));
|
||||||
|
loopCount++;
|
||||||
|
}
|
||||||
|
return Void();
|
||||||
|
}
|
||||||
|
|
|
@ -823,6 +823,7 @@ Future<Void> anyTrue( std::vector<Reference<AsyncVar<bool>>> const& input, Refer
|
||||||
Future<Void> cancelOnly( std::vector<Future<Void>> const& futures );
|
Future<Void> cancelOnly( std::vector<Future<Void>> const& futures );
|
||||||
Future<Void> timeoutWarningCollector( FutureStream<Void> const& input, double const& logDelay, const char* const& context, UID const& id );
|
Future<Void> timeoutWarningCollector( FutureStream<Void> const& input, double const& logDelay, const char* const& context, UID const& id );
|
||||||
Future<bool> quorumEqualsTrue( std::vector<Future<bool>> const& futures, int const& required );
|
Future<bool> quorumEqualsTrue( std::vector<Future<bool>> const& futures, int const& required );
|
||||||
|
Future<Void> lowPriorityDelay( double const& waitTime );
|
||||||
|
|
||||||
ACTOR template <class T>
|
ACTOR template <class T>
|
||||||
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
|
Future<Void> streamHelper( PromiseStream<T> output, PromiseStream<Error> errors, Future<T> input ) {
|
||||||
|
|
|
@ -59,12 +59,11 @@ enum class TaskPriority {
|
||||||
TLogCommitReply = 8580,
|
TLogCommitReply = 8580,
|
||||||
TLogCommit = 8570,
|
TLogCommit = 8570,
|
||||||
ProxyGetRawCommittedVersion = 8565,
|
ProxyGetRawCommittedVersion = 8565,
|
||||||
ProxyCommitYield3 = 8562,
|
ProxyMasterVersionReply = 8560,
|
||||||
ProxyTLogCommitReply = 8560,
|
|
||||||
ProxyCommitYield2 = 8557,
|
ProxyCommitYield2 = 8557,
|
||||||
ProxyResolverReply = 8555,
|
ProxyTLogCommitReply = 8555,
|
||||||
ProxyMasterVersionReply = 8550,
|
ProxyCommitYield1 = 8550,
|
||||||
ProxyCommitYield1 = 8547,
|
ProxyResolverReply = 8547,
|
||||||
ProxyCommit = 8545,
|
ProxyCommit = 8545,
|
||||||
ProxyCommitBatcher = 8540,
|
ProxyCommitBatcher = 8540,
|
||||||
TLogConfirmRunningReply = 8530,
|
TLogConfirmRunningReply = 8530,
|
||||||
|
@ -420,6 +419,10 @@ public:
|
||||||
// Provides a clock that advances at a similar rate on all connected endpoints
|
// Provides a clock that advances at a similar rate on all connected endpoints
|
||||||
// FIXME: Return a fixed point Time class
|
// FIXME: Return a fixed point Time class
|
||||||
|
|
||||||
|
virtual double timer() = 0;
|
||||||
|
// A wrapper for directly getting the system time. The time returned by now() only updates in the run loop,
|
||||||
|
// so it cannot be used to measure times of functions that do not have wait statements.
|
||||||
|
|
||||||
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) = 0;
|
virtual Future<class Void> delay( double seconds, TaskPriority taskID ) = 0;
|
||||||
// The given future will be set after seconds have elapsed
|
// The given future will be set after seconds have elapsed
|
||||||
|
|
||||||
|
|
|
@ -282,7 +282,7 @@ struct _IncludeVersion {
|
||||||
ar >> v;
|
ar >> v;
|
||||||
if (!v.isValid()) {
|
if (!v.isValid()) {
|
||||||
auto err = incompatible_protocol_version();
|
auto err = incompatible_protocol_version();
|
||||||
TraceEvent(SevError, "InvalidSerializationVersion").error(err).detailf("Version", "%llx", v);
|
TraceEvent(SevWarnAlways, "InvalidSerializationVersion").error(err).detailf("Version", "%llx", v);
|
||||||
throw err;
|
throw err;
|
||||||
}
|
}
|
||||||
if (v > currentProtocolVersion) {
|
if (v > currentProtocolVersion) {
|
||||||
|
|
|
@ -19,6 +19,10 @@ Requires: foundationdb-clients = %{version}-%{release}
|
||||||
Conflicts: foundationdb < 0.1.4
|
Conflicts: foundationdb < 0.1.4
|
||||||
ifdef(`RHEL6', `Requires(post): chkconfig >= 0.9, /sbin/service')
|
ifdef(`RHEL6', `Requires(post): chkconfig >= 0.9, /sbin/service')
|
||||||
Requires(pre): /usr/sbin/useradd, /usr/sbin/groupadd, /usr/bin/getent
|
Requires(pre): /usr/sbin/useradd, /usr/sbin/groupadd, /usr/bin/getent
|
||||||
|
# This is a heavy hammer, to remove /usr/bin/python as a dependency,
|
||||||
|
# as it also removes dependencies like glibc. However, none of the
|
||||||
|
# other strategies (__requires_exclude) seem to work.
|
||||||
|
AutoReq: 0
|
||||||
|
|
||||||
%package clients
|
%package clients
|
||||||
Summary: FoundationDB clients and library
|
Summary: FoundationDB clients and library
|
||||||
|
|
Loading…
Reference in New Issue