fdbrpc: Add warning when peer is unavailable for long time
This commit is contained in:
parent
6b8daeae6e
commit
598b2eaeb0
|
@ -403,6 +403,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
.detail("PeerAddr", self->destination)
|
||||
.detail("ConnSet", (bool)conn);
|
||||
|
||||
state Optional<double> firstConnFailedTime = Optional<double>();
|
||||
loop {
|
||||
try {
|
||||
if (!conn) { // Always, except for the first loop with an incoming connection
|
||||
|
@ -464,6 +465,7 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
self->outgoingConnectionIdle = false;
|
||||
}
|
||||
|
||||
firstConnFailedTime.reset();
|
||||
try {
|
||||
self->transport->countConnEstablished++;
|
||||
wait( connectionWriter( self, conn ) || reader || connectionMonitor(self) );
|
||||
|
@ -485,6 +487,17 @@ ACTOR Future<Void> connectionKeeper( Reference<Peer> self,
|
|||
} else {
|
||||
self->reconnectionDelay = std::min(FLOW_KNOBS->MAX_RECONNECTION_TIME, self->reconnectionDelay * FLOW_KNOBS->RECONNECTION_TIME_GROWTH_RATE);
|
||||
}
|
||||
|
||||
if (firstConnFailedTime.present()) {
|
||||
if (now() - firstConnFailedTime.get() > FLOW_KNOBS->PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT) {
|
||||
TraceEvent(SevWarnAlways, "PeerUnavailableForLongTime", conn ? conn->getDebugID() : UID())
|
||||
.detail("PeerAddr", self->destination);
|
||||
firstConnFailedTime = now() - FLOW_KNOBS->PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT/2.0;
|
||||
}
|
||||
} else {
|
||||
firstConnFailedTime = now();
|
||||
}
|
||||
|
||||
self->discardUnreliablePackets();
|
||||
reader = Future<Void>();
|
||||
bool ok = e.code() == error_code_connection_failed || e.code() == error_code_actor_cancelled ||
|
||||
|
|
|
@ -72,6 +72,7 @@ FlowKnobs::FlowKnobs(bool randomize, bool isSimulated) {
|
|||
init( USE_OBJECT_SERIALIZER, 1 );
|
||||
init( TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY, 5.0 );
|
||||
init( TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT, 20.0 );
|
||||
init( PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT, 3600.0 );
|
||||
|
||||
init( TLS_CERT_REFRESH_DELAY_SECONDS, 12*60*60 );
|
||||
|
||||
|
|
|
@ -106,6 +106,7 @@ public:
|
|||
double PAGE_CACHE_TRUNCATE_LOOKUP_FRACTION;
|
||||
double TOO_MANY_CONNECTIONS_CLOSED_RESET_DELAY;
|
||||
int TOO_MANY_CONNECTIONS_CLOSED_TIMEOUT;
|
||||
int PEER_UNAVAILABLE_FOR_LONG_TIME_TIMEOUT;
|
||||
|
||||
//AsyncFileEIO
|
||||
int EIO_MAX_PARALLELISM;
|
||||
|
|
Loading…
Reference in New Issue