2017-05-26 04:48:44 +08:00
|
|
|
/*
|
|
|
|
* WaitFailure.actor.cpp
|
|
|
|
*
|
|
|
|
* This source file is part of the FoundationDB open source project
|
|
|
|
*
|
2022-03-22 04:36:23 +08:00
|
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
2018-02-22 02:25:11 +08:00
|
|
|
*
|
2017-05-26 04:48:44 +08:00
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "fdbrpc/FailureMonitor.h"
|
|
|
|
#include "flow/Deque.h"
|
2018-10-20 01:30:13 +08:00
|
|
|
#include "fdbserver/Knobs.h"
|
2018-08-11 06:18:24 +08:00
|
|
|
#include "flow/actorcompiler.h" // This must be the last #include.
|
2017-05-26 04:48:44 +08:00
|
|
|
|
|
|
|
ACTOR Future<Void> waitFailureServer(FutureStream<ReplyPromise<Void>> waitFailure) {
|
|
|
|
// when this actor is cancelled, the promises in the queue will send broken_promise
|
|
|
|
state Deque<ReplyPromise<Void>> queue;
|
|
|
|
loop {
|
|
|
|
ReplyPromise<Void> P = waitNext(waitFailure);
|
|
|
|
queue.push_back(P);
|
|
|
|
if (queue.size() > SERVER_KNOBS->MAX_OUTSTANDING_WAIT_FAILURE_REQUESTS) {
|
2022-07-20 04:15:51 +08:00
|
|
|
CODE_PROBE(true, "wait server queue full");
|
2017-05-26 04:48:44 +08:00
|
|
|
queue.front().send(Void());
|
|
|
|
queue.pop_front();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-07-11 06:53:21 +08:00
|
|
|
ACTOR Future<Void> waitFailureClient(RequestStream<ReplyPromise<Void>> waitFailure,
|
|
|
|
double reactionTime,
|
|
|
|
double reactionSlope,
|
|
|
|
bool trace,
|
|
|
|
TaskPriority taskID) {
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
state double start = now();
|
|
|
|
ErrorOr<Void> x =
|
|
|
|
wait(waitFailure.getReplyUnlessFailedFor(ReplyPromise<Void>(), reactionTime, reactionSlope, taskID));
|
2020-07-11 06:53:21 +08:00
|
|
|
if (!x.present()) {
|
|
|
|
if (trace) {
|
|
|
|
TraceEvent("WaitFailureClient")
|
2020-08-31 16:10:29 +08:00
|
|
|
.detail("FailedEndpoint", waitFailure.getEndpoint().getPrimaryAddress().toString())
|
|
|
|
.detail("Token", waitFailure.getEndpoint().token);
|
2020-07-11 06:53:21 +08:00
|
|
|
}
|
|
|
|
return Void();
|
|
|
|
}
|
2017-05-26 04:48:44 +08:00
|
|
|
double w = start + SERVER_KNOBS->WAIT_FAILURE_DELAY_LIMIT - now();
|
|
|
|
if (w > 0)
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(w, taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
2018-06-09 04:57:00 +08:00
|
|
|
TraceEvent(SevError, "WaitFailureClientError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
ASSERT(false); // unknown error from waitFailureServer
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-25 17:47:35 +08:00
|
|
|
ACTOR Future<Void> waitFailureClientStrict(RequestStream<ReplyPromise<Void>> waitFailure,
|
|
|
|
double failureReactionTime,
|
|
|
|
TaskPriority taskID) {
|
2019-05-22 02:54:17 +08:00
|
|
|
loop {
|
2020-07-11 06:53:21 +08:00
|
|
|
wait(waitFailureClient(waitFailure, 0, 0, false, taskID));
|
2019-05-22 02:54:17 +08:00
|
|
|
wait(delay(failureReactionTime, taskID) ||
|
|
|
|
IFailureMonitor::failureMonitor().onStateEqual(waitFailure.getEndpoint(), FailureStatus(false)));
|
|
|
|
if (IFailureMonitor::failureMonitor().getState(waitFailure.getEndpoint()).isFailed()) {
|
|
|
|
return Void();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-06-25 17:47:35 +08:00
|
|
|
ACTOR Future<Void> waitFailureTracker(RequestStream<ReplyPromise<Void>> waitFailure,
|
|
|
|
Reference<AsyncVar<bool>> failed,
|
|
|
|
double reactionTime,
|
|
|
|
double reactionSlope,
|
|
|
|
TaskPriority taskID) {
|
2017-05-26 04:48:44 +08:00
|
|
|
loop {
|
|
|
|
try {
|
|
|
|
failed->set(IFailureMonitor::failureMonitor().getState(waitFailure.getEndpoint()).isFailed());
|
|
|
|
if (failed->get()) {
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(IFailureMonitor::failureMonitor().onStateChanged(waitFailure.getEndpoint()));
|
2017-05-26 04:48:44 +08:00
|
|
|
} else {
|
|
|
|
state double start = now();
|
|
|
|
ErrorOr<Void> x = wait(
|
|
|
|
waitFailure.getReplyUnlessFailedFor(ReplyPromise<Void>(), reactionTime, reactionSlope, taskID));
|
|
|
|
if (x.present()) {
|
|
|
|
double w = start + SERVER_KNOBS->WAIT_FAILURE_DELAY_LIMIT - now();
|
|
|
|
if (w > 0)
|
2018-08-11 04:57:10 +08:00
|
|
|
wait(delay(w, taskID));
|
2017-05-26 04:48:44 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (Error& e) {
|
|
|
|
if (e.code() == error_code_actor_cancelled)
|
|
|
|
throw;
|
2018-06-09 04:57:00 +08:00
|
|
|
TraceEvent(SevError, "WaitFailureClientError").error(e);
|
2017-05-26 04:48:44 +08:00
|
|
|
ASSERT(false); // unknown error from waitFailureServer
|
|
|
|
}
|
|
|
|
}
|
2018-08-11 06:18:24 +08:00
|
|
|
}
|