Merge branch 'release-6.1'

# Conflicts:
#	documentation/sphinx/source/release-notes.rst
#	versions.target
This commit is contained in:
Evan Tschannen 2019-04-08 18:38:42 -07:00
commit 21c0ba555c
32 changed files with 169 additions and 64 deletions

View File

@ -77,7 +77,9 @@ fdb_bool_t fdb_error_predicate( int predicate_test, fdb_error_t code ) {
return code == error_code_not_committed || return code == error_code_not_committed ||
code == error_code_transaction_too_old || code == error_code_transaction_too_old ||
code == error_code_future_version || code == error_code_future_version ||
code == error_code_database_locked; code == error_code_database_locked ||
code == error_code_proxy_memory_limit_exceeded ||
code == error_code_process_behind;
} }
return false; return false;
} }

View File

@ -6,11 +6,11 @@ if(WIN32)
target_compile_options(coveragetool PRIVATE "/langversion:6") target_compile_options(coveragetool PRIVATE "/langversion:6")
set_property(TARGET coveragetool PROPERTY VS_DOTNET_REFERENCES set_property(TARGET coveragetool PROPERTY VS_DOTNET_REFERENCES
"System" "System"
"ystem.Core" "System.Core"
"System.Xml.Linq" "System.Xml.Linq"
"ystem.Data.DataSetExtensions" "System.Data.DataSetExtensions"
"Microsoft.CSharp" "Microsoft.CSharp"
"ystem.Data" "System.Data"
"System.Xml") "System.Xml")
else() else()
set(COVERAGETOOL_COMPILER_REFERENCES set(COVERAGETOOL_COMPILER_REFERENCES

View File

@ -661,6 +661,11 @@ You can now remove old client library versions from your clients. This is only t
Version-specific notes on upgrading Version-specific notes on upgrading
=================================== ===================================
Upgrading from 6.1.x
--------------------
Upgrades from 6.1.x will keep all your old data and configuration settings.
Upgrading from 6.0.x Upgrading from 6.0.x
-------------------- --------------------

View File

@ -32,12 +32,16 @@ FoundationDB may return the following error codes from API functions. If you nee
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| accessed_unreadable | 1036| Read or wrote an unreadable key | | accessed_unreadable | 1036| Read or wrote an unreadable key |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| process_behind | 1037| Storage process does not have recent mutations |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| database_locked | 1038| Database is locked | | database_locked | 1038| Database is locked |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| cluster_version_changed | 1039| Cluster has been upgraded to a new protocol version | | cluster_version_changed | 1039| Cluster has been upgraded to a new protocol version |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| external_client_already_loaded | 1040| External client has already been loaded | | external_client_already_loaded | 1040| External client has already been loaded |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| proxy_memory_limit_exceeded | 1042| Proxy commit memory limit exceeded |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| operation_cancelled | 1101| Asynchronous operation cancelled | | operation_cancelled | 1101| Asynchronous operation cancelled |
+-----------------------------------------------+-----+--------------------------------------------------------------------------------+ +-----------------------------------------------+-----+--------------------------------------------------------------------------------+
| future_released | 1102| Future has been released | | future_released | 1102| Future has been released |

View File

@ -10,38 +10,38 @@ macOS
The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server. The macOS installation package is supported on macOS 10.7+. It includes the client and (optionally) the server.
* `FoundationDB-6.1.0.pkg <https://www.foundationdb.org/downloads/6.1.0/macOS/installers/FoundationDB-6.1.0.pkg>`_ * `FoundationDB-6.1.1.pkg <https://www.foundationdb.org/downloads/6.1.1/macOS/installers/FoundationDB-6.1.1.pkg>`_
Ubuntu Ubuntu
------ ------
The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x. The Ubuntu packages are supported on 64-bit Ubuntu 12.04+, but beware of the Linux kernel bug in Ubuntu 12.x.
* `foundationdb-clients-6.1.0-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.0/ubuntu/installers/foundationdb-clients_6.1.0-1_amd64.deb>`_ * `foundationdb-clients-6.1.1-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.1/ubuntu/installers/foundationdb-clients_6.1.1-1_amd64.deb>`_
* `foundationdb-server-6.1.0-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.0/ubuntu/installers/foundationdb-server_6.1.0-1_amd64.deb>`_ (depends on the clients package) * `foundationdb-server-6.1.1-1_amd64.deb <https://www.foundationdb.org/downloads/6.1.1/ubuntu/installers/foundationdb-server_6.1.1-1_amd64.deb>`_ (depends on the clients package)
RHEL/CentOS EL6 RHEL/CentOS EL6
--------------- ---------------
The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x. The RHEL/CentOS EL6 packages are supported on 64-bit RHEL/CentOS 6.x.
* `foundationdb-clients-6.1.0-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.0/rhel6/installers/foundationdb-clients-6.1.0-1.el6.x86_64.rpm>`_ * `foundationdb-clients-6.1.1-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.1/rhel6/installers/foundationdb-clients-6.1.1-1.el6.x86_64.rpm>`_
* `foundationdb-server-6.1.0-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.0/rhel6/installers/foundationdb-server-6.1.0-1.el6.x86_64.rpm>`_ (depends on the clients package) * `foundationdb-server-6.1.1-1.el6.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.1/rhel6/installers/foundationdb-server-6.1.1-1.el6.x86_64.rpm>`_ (depends on the clients package)
RHEL/CentOS EL7 RHEL/CentOS EL7
--------------- ---------------
The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x. The RHEL/CentOS EL7 packages are supported on 64-bit RHEL/CentOS 7.x.
* `foundationdb-clients-6.1.0-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.0/rhel7/installers/foundationdb-clients-6.1.0-1.el7.x86_64.rpm>`_ * `foundationdb-clients-6.1.1-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.1/rhel7/installers/foundationdb-clients-6.1.1-1.el7.x86_64.rpm>`_
* `foundationdb-server-6.1.0-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.0/rhel7/installers/foundationdb-server-6.1.0-1.el7.x86_64.rpm>`_ (depends on the clients package) * `foundationdb-server-6.1.1-1.el7.x86_64.rpm <https://www.foundationdb.org/downloads/6.1.1/rhel7/installers/foundationdb-server-6.1.1-1.el7.x86_64.rpm>`_ (depends on the clients package)
Windows Windows
------- -------
The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server. The Windows installer is supported on 64-bit Windows XP and later. It includes the client and (optionally) the server.
* `foundationdb-6.1.0-x64.msi <https://www.foundationdb.org/downloads/6.1.0/windows/installers/foundationdb-6.1.0-x64.msi>`_ * `foundationdb-6.1.1-x64.msi <https://www.foundationdb.org/downloads/6.1.1/windows/installers/foundationdb-6.1.1-x64.msi>`_
API Language Bindings API Language Bindings
===================== =====================
@ -58,18 +58,18 @@ On macOS and Windows, the FoundationDB Python API bindings are installed as part
If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package: If you need to use the FoundationDB Python API from other Python installations or paths, download the Python package:
* `foundationdb-6.1.0.tar.gz <https://www.foundationdb.org/downloads/6.1.0/bindings/python/foundationdb-6.1.0.tar.gz>`_ * `foundationdb-6.1.1.tar.gz <https://www.foundationdb.org/downloads/6.1.1/bindings/python/foundationdb-6.1.1.tar.gz>`_
Ruby 1.9.3/2.0.0+ Ruby 1.9.3/2.0.0+
----------------- -----------------
* `fdb-6.1.0.gem <https://www.foundationdb.org/downloads/6.1.0/bindings/ruby/fdb-6.1.0.gem>`_ * `fdb-6.1.1.gem <https://www.foundationdb.org/downloads/6.1.1/bindings/ruby/fdb-6.1.1.gem>`_
Java 8+ Java 8+
------- -------
* `fdb-java-6.1.0.jar <https://www.foundationdb.org/downloads/6.1.0/bindings/java/fdb-java-6.1.0.jar>`_ * `fdb-java-6.1.1.jar <https://www.foundationdb.org/downloads/6.1.1/bindings/java/fdb-java-6.1.1.jar>`_
* `fdb-java-6.1.0-javadoc.jar <https://www.foundationdb.org/downloads/6.1.0/bindings/java/fdb-java-6.1.0-javadoc.jar>`_ * `fdb-java-6.1.1-javadoc.jar <https://www.foundationdb.org/downloads/6.1.1/bindings/java/fdb-java-6.1.1-javadoc.jar>`_
Go 1.1+ Go 1.1+
------- -------

View File

@ -434,7 +434,8 @@
"triple", "triple",
"three_datacenter", "three_datacenter",
"three_datacenter_fallback", "three_datacenter_fallback",
"three_data_hall" "three_data_hall",
"three_data_hall_fallback"
]}, ]},
"regions":[{ "regions":[{
"datacenters":[{ "datacenters":[{

View File

@ -448,9 +448,14 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RangeResultWithVersi
} }
} }
catch (Error &e) { catch (Error &e) {
if (e.code() != error_code_transaction_too_old && e.code() != error_code_future_version) if (e.code() == error_code_transaction_too_old) {
throw; // We are using this transaction until it's too old and then resetting to a fresh one,
tr = Transaction(cx); // so we don't need to delay.
tr.fullReset();
}
else {
wait(tr.onError(e));
}
} }
} }
} }
@ -539,9 +544,14 @@ ACTOR Future<Void> readCommitted(Database cx, PromiseStream<RCGroup> results, Fu
nextKey = firstGreaterThan(rangevalue.end()[-1].key); nextKey = firstGreaterThan(rangevalue.end()[-1].key);
} }
catch (Error &e) { catch (Error &e) {
if (e.code() != error_code_transaction_too_old && e.code() != error_code_future_version) if (e.code() == error_code_transaction_too_old) {
throw; // We are using this transaction until it's too old and then resetting to a fresh one,
wait(tr.onError(e)); // so we don't need to delay.
tr.fullReset();
}
else {
wait(tr.onError(e));
}
} }
} }
} }

View File

@ -239,6 +239,8 @@ StatusObject DatabaseConfiguration::toJSON(bool noPolicies) const {
result["redundancy_mode"] = "triple"; result["redundancy_mode"] = "triple";
} else if( tLogReplicationFactor == 4 && storageTeamSize == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) { } else if( tLogReplicationFactor == 4 && storageTeamSize == 3 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^3 x 1" ) {
result["redundancy_mode"] = "three_data_hall"; result["redundancy_mode"] = "three_data_hall";
} else if( tLogReplicationFactor == 4 && storageTeamSize == 2 && tlogInfo == "data_hall^2 x zoneid^2 x 1" && storageInfo == "data_hall^2 x 1" ) {
result["redundancy_mode"] = "three_data_hall_fallback";
} else { } else {
customRedundancy = true; customRedundancy = true;
} }

View File

@ -147,6 +147,7 @@ public:
int64_t transactionsNotCommitted; int64_t transactionsNotCommitted;
int64_t transactionsMaybeCommitted; int64_t transactionsMaybeCommitted;
int64_t transactionsResourceConstrained; int64_t transactionsResourceConstrained;
int64_t transactionsProcessBehind;
ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit; ContinuousSample<double> latencies, readLatencies, commitLatencies, GRVLatencies, mutationsPerCommit, bytesPerCommit;
int outstandingWatches; int outstandingWatches;

View File

@ -169,7 +169,11 @@ ACTOR Future<Void> failureMonitorClientLoop(
ACTOR Future<Void> failureMonitorClient( Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, bool trackMyStatus ) { ACTOR Future<Void> failureMonitorClient( Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, bool trackMyStatus ) {
state SimpleFailureMonitor* monitor = static_cast<SimpleFailureMonitor*>( &IFailureMonitor::failureMonitor() ); state SimpleFailureMonitor* monitor = static_cast<SimpleFailureMonitor*>( &IFailureMonitor::failureMonitor() );
state Reference<FailureMonitorClientState> fmState = Reference<FailureMonitorClientState>(new FailureMonitorClientState()); state Reference<FailureMonitorClientState> fmState = Reference<FailureMonitorClientState>(new FailureMonitorClientState());
auto localAddr = g_network->getLocalAddresses();
monitor->setStatus(localAddr.address, FailureStatus(false));
if(localAddr.secondaryAddress.present()) {
monitor->setStatus(localAddr.secondaryAddress.get(), FailureStatus(false));
}
loop { loop {
state Future<Void> client = ci->get().present() ? failureMonitorClientLoop(monitor, ci->get().get(), fmState, trackMyStatus) : Void(); state Future<Void> client = ci->get().present() ? failureMonitorClientLoop(monitor, ci->get().get(), fmState, trackMyStatus) : Void();
wait( ci->onChange() ); wait( ci->onChange() );

View File

@ -145,6 +145,13 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall", tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall",
Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne()))) Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
)); ));
} else if(mode == "three_data_hall_fallback") {
redundancy="2";
log_replicas="4";
storagePolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall", Reference<IReplicationPolicy>(new PolicyOne())));
tLogPolicy = Reference<IReplicationPolicy>(new PolicyAcross(2, "data_hall",
Reference<IReplicationPolicy>(new PolicyAcross(2, "zoneid", Reference<IReplicationPolicy>(new PolicyOne())))
));
} else } else
redundancySpecified = false; redundancySpecified = false;
if (redundancySpecified) { if (redundancySpecified) {
@ -510,6 +517,12 @@ ConfigureAutoResult parseConfig( StatusObject const& status ) {
} else if( result.old_replication == "three_datacenter_fallback" ) { } else if( result.old_replication == "three_datacenter_fallback" ) {
storage_replication = 4; storage_replication = 4;
log_replication = 4; log_replication = 4;
} else if( result.old_replication == "three_data_hall" ) {
storage_replication = 3;
log_replication = 4;
} else if( result.old_replication == "three_data_hall_fallback" ) {
storage_replication = 2;
log_replication = 4;
} else } else
return ConfigureAutoResult(); return ConfigureAutoResult();

View File

@ -222,6 +222,7 @@ ACTOR Future<Void> databaseLogger( DatabaseContext *cx ) {
.detail("NotCommitted", cx->transactionsNotCommitted) .detail("NotCommitted", cx->transactionsNotCommitted)
.detail("MaybeCommitted", cx->transactionsMaybeCommitted) .detail("MaybeCommitted", cx->transactionsMaybeCommitted)
.detail("ResourceConstrained", cx->transactionsResourceConstrained) .detail("ResourceConstrained", cx->transactionsResourceConstrained)
.detail("ProcessBehind", cx->transactionsProcessBehind)
.detail("MeanLatency", cx->latencies.mean()) .detail("MeanLatency", cx->latencies.mean())
.detail("MedianLatency", cx->latencies.median()) .detail("MedianLatency", cx->latencies.median())
.detail("Latency90", cx->latencies.percentile(0.90)) .detail("Latency90", cx->latencies.percentile(0.90))
@ -513,7 +514,7 @@ DatabaseContext::DatabaseContext(
lockAware(lockAware), apiVersion(apiVersion), provisional(false), lockAware(lockAware), apiVersion(apiVersion), provisional(false),
transactionReadVersions(0), transactionLogicalReads(0), transactionPhysicalReads(0), transactionCommittedMutations(0), transactionCommittedMutationBytes(0), transactionReadVersions(0), transactionLogicalReads(0), transactionPhysicalReads(0), transactionCommittedMutations(0), transactionCommittedMutationBytes(0),
transactionsCommitStarted(0), transactionsCommitCompleted(0), transactionsTooOld(0), transactionsFutureVersions(0), transactionsNotCommitted(0), transactionsCommitStarted(0), transactionsCommitCompleted(0), transactionsTooOld(0), transactionsFutureVersions(0), transactionsNotCommitted(0),
transactionsMaybeCommitted(0), transactionsResourceConstrained(0), outstandingWatches(0), transactionTimeout(0.0), transactionMaxRetries(-1), transactionsMaybeCommitted(0), transactionsResourceConstrained(0), transactionsProcessBehind(0), outstandingWatches(0), transactionTimeout(0.0), transactionMaxRetries(-1),
latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0), latencies(1000), readLatencies(1000), commitLatencies(1000), GRVLatencies(1000), mutationsPerCommit(1000), bytesPerCommit(1000), mvCacheInsertLocation(0),
healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0) healthMetricsLastUpdated(0), detailedHealthMetricsLastUpdated(0)
{ {
@ -1487,8 +1488,9 @@ ACTOR Future< Void > watchValue( Future<Version> version, Key key, Optional<Valu
if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) { if (e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed) {
cx->invalidateCache( key ); cx->invalidateCache( key );
wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID)); wait(delay(CLIENT_KNOBS->WRONG_SHARD_SERVER_DELAY, info.taskID));
} else if( e.code() == error_code_watch_cancelled ) { } else if( e.code() == error_code_watch_cancelled || e.code() == error_code_process_behind ) {
TEST( true ); // Too many watches on the storage server, poll for changes instead TEST( e.code() == error_code_watch_cancelled ); // Too many watches on the storage server, poll for changes instead
TEST( e.code() == error_code_process_behind ); // The storage servers are all behind
wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID)); wait(delay(CLIENT_KNOBS->WATCH_POLLING_TIME, info.taskID));
} else if ( e.code() == error_code_timed_out ) { //The storage server occasionally times out watches in case it was cancelled } else if ( e.code() == error_code_timed_out ) { //The storage server occasionally times out watches in case it was cancelled
TEST( true ); // A watch timed out TEST( true ); // A watch timed out
@ -3028,7 +3030,8 @@ Future<Void> Transaction::onError( Error const& e ) {
if (e.code() == error_code_not_committed || if (e.code() == error_code_not_committed ||
e.code() == error_code_commit_unknown_result || e.code() == error_code_commit_unknown_result ||
e.code() == error_code_database_locked || e.code() == error_code_database_locked ||
e.code() == error_code_proxy_memory_limit_exceeded) e.code() == error_code_proxy_memory_limit_exceeded ||
e.code() == error_code_process_behind)
{ {
if(e.code() == error_code_not_committed) if(e.code() == error_code_not_committed)
cx->transactionsNotCommitted++; cx->transactionsNotCommitted++;
@ -3036,6 +3039,8 @@ Future<Void> Transaction::onError( Error const& e ) {
cx->transactionsMaybeCommitted++; cx->transactionsMaybeCommitted++;
if (e.code() == error_code_proxy_memory_limit_exceeded) if (e.code() == error_code_proxy_memory_limit_exceeded)
cx->transactionsResourceConstrained++; cx->transactionsResourceConstrained++;
if (e.code() == error_code_process_behind)
cx->transactionsProcessBehind++;
double backoff = getBackoff(e.code()); double backoff = getBackoff(e.code());
reset(); reset();

View File

@ -458,7 +458,8 @@ const KeyRef JSONSchemas::statusSchema = LiteralStringRef(R"statusSchema(
"triple", "triple",
"three_datacenter", "three_datacenter",
"three_datacenter_fallback", "three_datacenter_fallback",
"three_data_hall" "three_data_hall",
"three_data_hall_fallback"
]}, ]},
"regions":[{ "regions":[{
"datacenters":[{ "datacenters":[{
@ -667,7 +668,8 @@ const KeyRef JSONSchemas::clusterConfigurationSchema = LiteralStringRef(R"config
"triple", "triple",
"three_datacenter", "three_datacenter",
"three_datacenter_fallback", "three_datacenter_fallback",
"three_data_hall" "three_data_hall",
"three_data_hall_fallback"
]}, ]},
"regions":[{ "regions":[{
"datacenters":[{ "datacenters":[{

View File

@ -138,6 +138,12 @@ FailureStatus SimpleFailureMonitor::getState( Endpoint const& endpoint ) {
} }
} }
FailureStatus SimpleFailureMonitor::getState( NetworkAddress const& address ) {
auto a = addressStatus.find(address);
if (a == addressStatus.end()) return FailureStatus();
else return a->second;
}
bool SimpleFailureMonitor::onlyEndpointFailed( Endpoint const& endpoint ) { bool SimpleFailureMonitor::onlyEndpointFailed( Endpoint const& endpoint ) {
if(!endpointKnownFailed.get(endpoint)) if(!endpointKnownFailed.get(endpoint))
return false; return false;

View File

@ -84,6 +84,9 @@ public:
// Returns the currently known status for the endpoint // Returns the currently known status for the endpoint
virtual FailureStatus getState( Endpoint const& endpoint ) = 0; virtual FailureStatus getState( Endpoint const& endpoint ) = 0;
// Returns the currently known status for the address
virtual FailureStatus getState( NetworkAddress const& address ) = 0;
// Only use this function when the endpoint is known to be failed // Only use this function when the endpoint is known to be failed
virtual void endpointNotFound( Endpoint const& ) = 0; virtual void endpointNotFound( Endpoint const& ) = 0;
@ -130,6 +133,7 @@ public:
virtual Future<Void> onStateChanged( Endpoint const& endpoint ); virtual Future<Void> onStateChanged( Endpoint const& endpoint );
virtual FailureStatus getState( Endpoint const& endpoint ); virtual FailureStatus getState( Endpoint const& endpoint );
virtual FailureStatus getState( NetworkAddress const& address );
virtual Future<Void> onDisconnectOrFailure( Endpoint const& endpoint ); virtual Future<Void> onDisconnectOrFailure( Endpoint const& endpoint );
virtual bool onlyEndpointFailed( Endpoint const& endpoint ); virtual bool onlyEndpointFailed( Endpoint const& endpoint );
virtual bool permanentlyFailed( Endpoint const& endpoint ); virtual bool permanentlyFailed( Endpoint const& endpoint );

View File

@ -149,7 +149,9 @@ public:
lastIncompatibleMessage(0), lastIncompatibleMessage(0),
transportId(transportId), transportId(transportId),
numIncompatibleConnections(0) numIncompatibleConnections(0)
{} {
degraded = Reference<AsyncVar<bool>>( new AsyncVar<bool>(false) );
}
~TransportData(); ~TransportData();
@ -170,6 +172,8 @@ public:
NetworkAddressList localAddresses; NetworkAddressList localAddresses;
std::vector<Future<Void>> listeners; std::vector<Future<Void>> listeners;
std::unordered_map<NetworkAddress, struct Peer*> peers; std::unordered_map<NetworkAddress, struct Peer*> peers;
std::unordered_map<NetworkAddress, std::pair<double, double>> closedPeers;
Reference<AsyncVar<bool>> degraded;
bool warnAlwaysForLargePacket; bool warnAlwaysForLargePacket;
// These declarations must be in exactly this order // These declarations must be in exactly this order
@ -483,6 +487,17 @@ struct Peer : NonCopyable {
TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination); TraceEvent(ok ? SevInfo : SevWarnAlways, "IncompatibleConnectionClosed", conn ? conn->getDebugID() : UID()).error(e, true).suppressFor(1.0).detail("PeerAddr", self->destination);
} }
if(self->destination.isPublic() && IFailureMonitor::failureMonitor().getState(self->destination).isAvailable()) {
auto& it = self->transport->closedPeers[self->destination];
if(now() - it.second > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY) {
it.first = now();
} else if(now() - it.first > FLOW_KNOBS->TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT) {
TraceEvent(SevWarnAlways, "TooManyConnectionsClosed", conn ? conn->getDebugID() : UID()).suppressFor(5.0).detail("PeerAddr", self->destination);
self->transport->degraded->set(true);
}
it.second = now();
}
if (conn) { if (conn) {
conn->close(); conn->close();
conn = Reference<IConnection>(); conn = Reference<IConnection>();
@ -1100,6 +1115,10 @@ int FlowTransport::getEndpointCount() {
return -1; return -1;
} }
Reference<AsyncVar<bool>> FlowTransport::getDegraded() {
return self->degraded;
}
bool FlowTransport::incompatibleOutgoingConnectionsPresent() { bool FlowTransport::incompatibleOutgoingConnectionsPresent() {
return self->numIncompatibleConnections > 0; return self->numIncompatibleConnections > 0;
} }

View File

@ -143,6 +143,9 @@ public:
// Makes PacketID "unreliable" (either the data or a connection close event will be delivered // Makes PacketID "unreliable" (either the data or a connection close event will be delivered
// eventually). It can still be used safely to send a reply to a "reliable" request. // eventually). It can still be used safely to send a reply to a "reliable" request.
Reference<AsyncVar<bool>> getDegraded();
// This async var will be set to true when the process cannot connect to a public network address that the failure monitor thinks is healthy.
void sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection = true );// { cancelReliable(sendReliable(what,destination)); } void sendUnreliable( ISerializeSource const& what, const Endpoint& destination, bool openConnection = true );// { cancelReliable(sendReliable(what,destination)); }
int getEndpointCount(); int getEndpointCount();

View File

@ -108,7 +108,7 @@ bool checkAndProcessResult(ErrorOr<T> result, Reference<ModelHolder> holder, boo
} }
if(triedAllOptions && errCode == error_code_process_behind) { if(triedAllOptions && errCode == error_code_process_behind) {
throw future_version(); throw result.getError();
} }
return false; return false;

View File

@ -310,12 +310,18 @@ public:
// See IFailureMonitor::onFailedFor() for an explanation of the duration and slope parameters. // See IFailureMonitor::onFailedFor() for an explanation of the duration and slope parameters.
template <class X> template <class X>
Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, int taskID) const { Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope, int taskID) const {
return waitValueOrSignal(getReply(value, taskID), makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(taskID), sustainedFailureDuration, sustainedFailureSlope), getEndpoint(taskID)); // If it is local endpoint, no need for failure monitoring
return waitValueOrSignal(getReply(value, taskID),
makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(taskID), sustainedFailureDuration, sustainedFailureSlope),
getEndpoint(taskID));
} }
template <class X> template <class X>
Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope) const { Future<ErrorOr<REPLY_TYPE(X)>> getReplyUnlessFailedFor(const X& value, double sustainedFailureDuration, double sustainedFailureSlope) const {
return waitValueOrSignal(getReply(value), makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(), sustainedFailureDuration, sustainedFailureSlope), getEndpoint()); // If it is local endpoint, no need for failure monitoring
return waitValueOrSignal(getReply(value),
makeDependent<T>(IFailureMonitor::failureMonitor()).onFailedFor(getEndpoint(), sustainedFailureDuration, sustainedFailureSlope),
getEndpoint());
} }
template <class X> template <class X>

View File

@ -75,7 +75,6 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue init( DISK_QUEUE_FILE_SHRINK_BYTES, 100<<20 ); // BUGGIFYd per file within the DiskQueue
init( TLOG_DEGRADED_DELAY_COUNT, 5 ); init( TLOG_DEGRADED_DELAY_COUNT, 5 );
init( TLOG_DEGRADED_DURATION, 5.0 ); init( TLOG_DEGRADED_DURATION, 5.0 );
init( TLOG_DEGRADED_RESET_INTERVAL, 48*60*60 ); if ( randomize && BUGGIFY ) TLOG_DEGRADED_RESET_INTERVAL = 10;
// Data distribution queue // Data distribution queue
init( HEALTH_POLL_TIME, 1.0 ); init( HEALTH_POLL_TIME, 1.0 );
@ -417,6 +416,9 @@ ServerKnobs::ServerKnobs(bool randomize, ClientKnobs* clientKnobs) {
//Worker //Worker
init( WORKER_LOGGING_INTERVAL, 5.0 ); init( WORKER_LOGGING_INTERVAL, 5.0 );
init( INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING, 5.0 ); init( INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING, 5.0 );
init( DEGRADED_RESET_INTERVAL, 24*60*60 ); if ( randomize && BUGGIFY ) DEGRADED_RESET_INTERVAL = 10;
init( DEGRADED_WARNING_LIMIT, 1 );
init( DEGRADED_WARNING_RESET_DELAY, 7*24*60*60 );
// Test harness // Test harness
init( WORKER_POLL_DELAY, 1.0 ); init( WORKER_POLL_DELAY, 1.0 );

View File

@ -79,7 +79,6 @@ public:
int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink? int64_t DISK_QUEUE_FILE_SHRINK_BYTES; // When we shrink the disk queue, by how many bytes should it shrink?
int TLOG_DEGRADED_DELAY_COUNT; int TLOG_DEGRADED_DELAY_COUNT;
double TLOG_DEGRADED_DURATION; double TLOG_DEGRADED_DURATION;
double TLOG_DEGRADED_RESET_INTERVAL;
// Data distribution queue // Data distribution queue
double HEALTH_POLL_TIME; double HEALTH_POLL_TIME;
@ -356,6 +355,9 @@ public:
//Worker //Worker
double WORKER_LOGGING_INTERVAL; double WORKER_LOGGING_INTERVAL;
double INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING; double INCOMPATIBLE_PEER_DELAY_BEFORE_LOGGING;
double DEGRADED_RESET_INTERVAL;
double DEGRADED_WARNING_LIMIT;
double DEGRADED_WARNING_RESET_DELAY;
// Test harness // Test harness
double WORKER_POLL_DELAY; double WORKER_POLL_DELAY;

View File

@ -1810,7 +1810,7 @@ ACTOR Future<Standalone<RangeResultRef>> tryGetRange( Database cx, Version versi
} }
} }
} catch( Error &e ) { } catch( Error &e ) {
if( begin.getKey() != keys.begin && ( e.code() == error_code_transaction_too_old || e.code() == error_code_future_version ) ) { if( begin.getKey() != keys.begin && ( e.code() == error_code_transaction_too_old || e.code() == error_code_future_version || e.code() == error_code_process_behind ) ) {
if( e.code() == error_code_transaction_too_old ) if( e.code() == error_code_transaction_too_old )
*isTooOld = true; *isTooOld = true;
output.more = true; output.more = true;
@ -2014,8 +2014,8 @@ ACTOR Future<Void> fetchKeys( StorageServer *data, AddingShard* shard ) {
debug_nextRetryToLog += std::min(debug_nextRetryToLog, 1024); debug_nextRetryToLog += std::min(debug_nextRetryToLog, 1024);
TraceEvent(SevWarn, "FetchPast", data->thisServerID).detail("TotalAttempts", debug_getRangeRetries).detail("FKID", interval.pairID).detail("V", lastFV).detail("N", fetchVersion).detail("E", data->version.get()); TraceEvent(SevWarn, "FetchPast", data->thisServerID).detail("TotalAttempts", debug_getRangeRetries).detail("FKID", interval.pairID).detail("V", lastFV).detail("N", fetchVersion).detail("E", data->version.get());
} }
} else if (e.code() == error_code_future_version) { } else if (e.code() == error_code_future_version || e.code() == error_code_process_behind) {
TEST(true); // fetchKeys got future_version, so there must be a huge storage lag somewhere. Keep trying. TEST(true); // fetchKeys got future_version or process_behind, so there must be a huge storage lag somewhere. Keep trying.
} else { } else {
throw; throw;
} }

View File

@ -625,7 +625,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
state WorkerCache<InitializeStorageReply> storageCache; state WorkerCache<InitializeStorageReply> storageCache;
state Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo>(ServerDBInfo()) ); state Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo>(ServerDBInfo()) );
state Future<Void> metricsLogger; state Future<Void> metricsLogger;
state Reference<AsyncVar<bool>> degraded( new AsyncVar<bool>(false) ); state Reference<AsyncVar<bool>> degraded = FlowTransport::transport().getDegraded();
// tLogFnForOptions() can return a function that doesn't correspond with the FDB version that the // tLogFnForOptions() can return a function that doesn't correspond with the FDB version that the
// TLogVersion represents. This can be done if the newer TLog doesn't support a requested option. // TLogVersion represents. This can be done if the newer TLog doesn't support a requested option.
// As (store type, spill type) can map to the same TLogFn across multiple TLogVersions, we need to // As (store type, spill type) can map to the same TLogFn across multiple TLogVersions, we need to
@ -652,7 +652,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
} }
} }
errorForwarders.add( resetAfter(degraded, SERVER_KNOBS->TLOG_DEGRADED_RESET_INTERVAL, false)); errorForwarders.add( resetAfter(degraded, SERVER_KNOBS->DEGRADED_RESET_INTERVAL, false, SERVER_KNOBS->DEGRADED_WARNING_LIMIT, SERVER_KNOBS->DEGRADED_WARNING_RESET_DELAY, "DegradedReset"));
errorForwarders.add( loadedPonger( interf.debugPing.getFuture() ) ); errorForwarders.add( loadedPonger( interf.debugPing.getFuture() ) );
errorForwarders.add( waitFailureServer( interf.waitFailure.getFuture() ) ); errorForwarders.add( waitFailureServer( interf.waitFailure.getFuture() ) );
errorForwarders.add( monitorServerDBInfo( ccInterface, connFile, locality, dbInfo ) ); errorForwarders.add( monitorServerDBInfo( ccInterface, connFile, locality, dbInfo ) );

View File

@ -288,7 +288,8 @@ struct ConsistencyCheckWorkload : TestWorkload
} }
catch(Error &e) catch(Error &e)
{ {
if(e.code() == error_code_transaction_too_old || e.code() == error_code_future_version || e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed || e.code() == error_code_server_request_queue_full) if (e.code() == error_code_transaction_too_old || e.code() == error_code_future_version ||
e.code() == error_code_wrong_shard_server || e.code() == error_code_all_alternatives_failed || e.code() == error_code_process_behind)
TraceEvent("ConsistencyCheck_Retry").error(e); // FIXME: consistency check does not retry in this case TraceEvent("ConsistencyCheck_Retry").error(e); // FIXME: consistency check does not retry in this case
else else
self->testFailure(format("Error %d - %s", e.code(), e.name())); self->testFailure(format("Error %d - %s", e.code(), e.name()));
@ -387,6 +388,7 @@ struct ConsistencyCheckWorkload : TestWorkload
state Key beginKey = allKeys.begin.withPrefix(keyServersPrefix); state Key beginKey = allKeys.begin.withPrefix(keyServersPrefix);
state Key endKey = allKeys.end.withPrefix(keyServersPrefix); state Key endKey = allKeys.end.withPrefix(keyServersPrefix);
state int i = 0; state int i = 0;
state Transaction onErrorTr(cx); // This transaction exists only to access onError and its backoff behavior
//If the responses are too big, we may use multiple requests to get the key locations. Each request begins where the last left off //If the responses are too big, we may use multiple requests to get the key locations. Each request begins where the last left off
for ( ; i < shards.size(); i++) { for ( ; i < shards.size(); i++) {
@ -466,11 +468,9 @@ struct ConsistencyCheckWorkload : TestWorkload
keyLocations.push_back_deep(keyLocations.arena(), currentLocations.end()[-1]); keyLocations.push_back_deep(keyLocations.arena(), currentLocations.end()[-1]);
} }
catch (Error& e) { catch (Error& e) {
//If we failed because of a version problem, then retry state Error err = e;
if(e.code() == error_code_transaction_too_old || e.code() == error_code_future_version || e.code() == error_code_transaction_too_old) wait(onErrorTr.onError(err));
TraceEvent("ConsistencyCheck_RetryGetKeyLocations").error(e); TraceEvent("ConsistencyCheck_RetryGetKeyLocations").error(err);
else
throw;
} }
} }
} }
@ -713,6 +713,7 @@ struct ConsistencyCheckWorkload : TestWorkload
state int64_t totalReadAmount = 0; state int64_t totalReadAmount = 0;
state KeySelector begin = firstGreaterOrEqual(range.begin); state KeySelector begin = firstGreaterOrEqual(range.begin);
state Transaction onErrorTr(cx); // This transaction exists only to access onError and its backoff behavior
//Read a limited number of entries at a time, repeating until all keys in the shard have been read //Read a limited number of entries at a time, repeating until all keys in the shard have been read
loop loop
@ -933,11 +934,9 @@ struct ConsistencyCheckWorkload : TestWorkload
} }
catch(Error &e) catch(Error &e)
{ {
//If we failed because of a version problem, then retry state Error err = e;
if(e.code() == error_code_transaction_too_old || e.code() == error_code_future_version || e.code() == error_code_transaction_too_old) wait(onErrorTr.onError(err));
TraceEvent("ConsistencyCheck_RetryDataConsistency").error(e); TraceEvent("ConsistencyCheck_RetryDataConsistency").error(err);
else
throw;
} }
} }

View File

@ -58,7 +58,8 @@ struct ExceptionContract {
e.code() == error_code_future_version || e.code() == error_code_future_version ||
e.code() == error_code_transaction_cancelled || e.code() == error_code_transaction_cancelled ||
e.code() == error_code_key_too_large || e.code() == error_code_key_too_large ||
e.code() == error_code_value_too_large) e.code() == error_code_value_too_large ||
e.code() == error_code_process_behind)
{ {
return; return;
} }

View File

@ -64,6 +64,8 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
init( RECONNECTION_TIME_GROWTH_RATE, 1.2 ); init( RECONNECTION_TIME_GROWTH_RATE, 1.2 );
init( RECONNECTION_RESET_TIME, 5.0 ); init( RECONNECTION_RESET_TIME, 5.0 );
init( CONNECTION_ACCEPT_DELAY, 0.01 ); init( CONNECTION_ACCEPT_DELAY, 0.01 );
init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 );
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 ); init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 );

View File

@ -95,6 +95,8 @@ public:
int64_t BUGGIFY_SIM_PAGE_CACHE_64K; int64_t BUGGIFY_SIM_PAGE_CACHE_64K;
int MAX_EVICT_ATTEMPTS; int MAX_EVICT_ATTEMPTS;
double PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION; double PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION;
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
int TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT;
//AsyncFileKAIO //AsyncFileKAIO
int MAX_OUTSTANDING; int MAX_OUTSTANDING;

View File

@ -615,7 +615,7 @@ void getNetworkTraffic(const IPAddress& ip, uint64_t& bytesSent, uint64_t& bytes
snmp_stream >> retransSegs; snmp_stream >> retransSegs;
} }
void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime) { void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime, bool logDetails) {
INJECT_FAULT( platform_error, "getMachineLoad" ); // Even though this function doesn't throw errors, the equivalents for other platforms do, and since all of our simulation testing is on Linux... INJECT_FAULT( platform_error, "getMachineLoad" ); // Even though this function doesn't throw errors, the equivalents for other platforms do, and since all of our simulation testing is on Linux...
std::ifstream stat_stream("/proc/stat", std::ifstream::in); std::ifstream stat_stream("/proc/stat", std::ifstream::in);
@ -628,7 +628,7 @@ void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime) {
totalTime = t_user+t_nice+t_system+t_idle+t_iowait+t_irq+t_softirq+t_steal+t_guest; totalTime = t_user+t_nice+t_system+t_idle+t_iowait+t_irq+t_softirq+t_steal+t_guest;
idleTime = t_idle+t_iowait; idleTime = t_idle+t_iowait;
if( !DEBUG_DETERMINISM ) if( !DEBUG_DETERMINISM && logDetails )
TraceEvent("MachineLoadDetail").detail("User", t_user).detail("Nice", t_nice).detail("System", t_system).detail("Idle", t_idle).detail("IOWait", t_iowait).detail("IRQ", t_irq).detail("SoftIRQ", t_softirq).detail("Steal", t_steal).detail("Guest", t_guest); TraceEvent("MachineLoadDetail").detail("User", t_user).detail("Nice", t_nice).detail("System", t_system).detail("Idle", t_idle).detail("IOWait", t_iowait).detail("IRQ", t_irq).detail("SoftIRQ", t_softirq).detail("Steal", t_steal).detail("Guest", t_guest);
} }
@ -818,7 +818,7 @@ void getNetworkTraffic(const IPAddress& ip, uint64_t& bytesSent, uint64_t& bytes
free(buf); free(buf);
} }
void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime) { void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime, bool logDetails) {
INJECT_FAULT( platform_error, "getMachineLoad" ); INJECT_FAULT( platform_error, "getMachineLoad" );
mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT; mach_msg_type_number_t count = HOST_CPU_LOAD_INFO_COUNT;
host_cpu_load_info_data_t r_load; host_cpu_load_info_data_t r_load;
@ -1103,7 +1103,7 @@ void initPdhStrings(SystemStatisticsState *state, std::string dataFolder) {
} }
#endif #endif
SystemStatistics getSystemStatistics(std::string dataFolder, const IPAddress* ip, SystemStatisticsState** statState) { SystemStatistics getSystemStatistics(std::string dataFolder, const IPAddress* ip, SystemStatisticsState** statState, bool logDetails) {
if( (*statState) == NULL ) if( (*statState) == NULL )
(*statState) = new SystemStatisticsState(); (*statState) = new SystemStatisticsState();
SystemStatistics returnStats; SystemStatistics returnStats;
@ -1238,7 +1238,7 @@ SystemStatistics getSystemStatistics(std::string dataFolder, const IPAddress* ip
uint64_t clockIdleTime = (*statState)->lastClockIdleTime; uint64_t clockIdleTime = (*statState)->lastClockIdleTime;
uint64_t clockTotalTime = (*statState)->lastClockTotalTime; uint64_t clockTotalTime = (*statState)->lastClockTotalTime;
getMachineLoad(clockIdleTime, clockTotalTime); getMachineLoad(clockIdleTime, clockTotalTime, logDetails);
returnStats.machineCPUSeconds = clockTotalTime - (*statState)->lastClockTotalTime != 0 ? ( 1 - ((clockIdleTime - (*statState)->lastClockIdleTime) / ((double)(clockTotalTime - (*statState)->lastClockTotalTime)))) * returnStats.elapsed : 0; returnStats.machineCPUSeconds = clockTotalTime - (*statState)->lastClockTotalTime != 0 ? ( 1 - ((clockIdleTime - (*statState)->lastClockIdleTime) / ((double)(clockTotalTime - (*statState)->lastClockTotalTime)))) * returnStats.elapsed : 0;
(*statState)->lastClockIdleTime = clockIdleTime; (*statState)->lastClockIdleTime = clockIdleTime;
(*statState)->lastClockTotalTime = clockTotalTime; (*statState)->lastClockTotalTime = clockTotalTime;

View File

@ -247,7 +247,7 @@ struct SystemStatisticsState;
struct IPAddress; struct IPAddress;
SystemStatistics getSystemStatistics(std::string dataFolder, const IPAddress* ip, SystemStatisticsState **statState); SystemStatistics getSystemStatistics(std::string dataFolder, const IPAddress* ip, SystemStatisticsState **statState, bool logDetails);
double getProcessorTimeThread(); double getProcessorTimeThread();
@ -272,7 +272,7 @@ void getNetworkTraffic(uint64_t& bytesSent, uint64_t& bytesReceived, uint64_t& o
void getDiskStatistics(std::string const& directory, uint64_t& currentIOs, uint64_t& busyTicks, uint64_t& reads, uint64_t& writes, uint64_t& writeSectors); void getDiskStatistics(std::string const& directory, uint64_t& currentIOs, uint64_t& busyTicks, uint64_t& reads, uint64_t& writes, uint64_t& writeSectors);
void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime); void getMachineLoad(uint64_t& idleTime, uint64_t& totalTime, bool logDetails);
double timer(); // Returns the system real time clock with high precision. May jump around when system time is adjusted! double timer(); // Returns the system real time clock with high precision. May jump around when system time is adjusted!
double timer_monotonic(); // Returns a high precision monotonic clock which is adjusted to be kind of similar to timer() at startup, but might not be a globally accurate time. double timer_monotonic(); // Returns a high precision monotonic clock which is adjusted to be kind of similar to timer() at startup, but might not be a globally accurate time.

View File

@ -45,7 +45,7 @@ SystemStatistics getSystemStatistics() {
static StatisticsState statState = StatisticsState(); static StatisticsState statState = StatisticsState();
const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
return getSystemStatistics( return getSystemStatistics(
machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState.systemState); machineState.folder.present() ? machineState.folder.get() : "", &ipAddr, &statState.systemState, false);
} }
#define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator<size>::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator<size>::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na") #define TRACEALLOCATOR( size ) TraceEvent("MemSample").detail("Count", FastAllocator<size>::getApproximateMemoryUnused()/size).detail("TotalSize", FastAllocator<size>::getApproximateMemoryUnused()).detail("SampleCount", 1).detail("Hash", "FastAllocatedUnused" #size ).detail("Bt", "na")
@ -54,7 +54,7 @@ SystemStatistics getSystemStatistics() {
SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *statState, bool machineMetrics) { SystemStatistics customSystemMonitor(std::string eventName, StatisticsState *statState, bool machineMetrics) {
const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress(); const IPAddress ipAddr = machineState.ip.present() ? machineState.ip.get() : IPAddress();
SystemStatistics currentStats = getSystemStatistics(machineState.folder.present() ? machineState.folder.get() : "", SystemStatistics currentStats = getSystemStatistics(machineState.folder.present() ? machineState.folder.get() : "",
&ipAddr, &statState->systemState); &ipAddr, &statState->systemState, true);
NetworkData netData; NetworkData netData;
netData.init(); netData.init();
if (!DEBUG_DETERMINISM && currentStats.initialized) { if (!DEBUG_DETERMINISM && currentStats.initialized) {

View File

@ -776,13 +776,23 @@ Future<Void> setAfter( Reference<AsyncVar<T>> var, double time, T val ) {
} }
ACTOR template <class T> ACTOR template <class T>
Future<Void> resetAfter( Reference<AsyncVar<T>> var, double time, T val ) { Future<Void> resetAfter( Reference<AsyncVar<T>> var, double time, T val, int warningLimit = -1, double warningResetDelay = 0, const char* context = NULL ) {
state bool isEqual = var->get() == val; state bool isEqual = var->get() == val;
state Future<Void> resetDelay = isEqual ? Never() : delay(time); state Future<Void> resetDelay = isEqual ? Never() : delay(time);
state int resetCount = 0;
state double lastReset = now();
loop { loop {
choose { choose {
when( wait( resetDelay ) ) { when( wait( resetDelay ) ) {
var->set( val ); var->set( val );
if(now() - lastReset > warningResetDelay) {
resetCount = 0;
}
resetCount++;
if(context && warningLimit >= 0 && resetCount > warningLimit) {
TraceEvent(SevWarnAlways, context).detail("ResetCount", resetCount).detail("LastReset", now() - lastReset);
}
lastReset = now();
isEqual = true; isEqual = true;
resetDelay = Never(); resetDelay = Never();
} }

View File

@ -32,7 +32,7 @@
<Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'> <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
<Product Name='$(var.Title)' <Product Name='$(var.Title)'
Id='{1A6617BB-F6FE-48AE-B5FA-161D7BA3D9CD}' Id='{1F036D0A-3560-4A5C-BD40-F1B254876257}'
UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}' UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
Version='$(var.Version)' Version='$(var.Version)'
Manufacturer='$(var.Manufacturer)' Manufacturer='$(var.Manufacturer)'