drbd: Bugfix for the connection behavior
If we get into the C_BROKEN_PIPE cstate once, the state engine set the thi->t_state of the receiver thread to restarting. But with the while loop in drbdd_init() a new connection gets established. After the call into drbdd() returns immediately since the thi->t_state is not RUNNING. The restart of drbd_init() then resets thi->t_state to RUNNING. I.e. after entering C_BROKEN_PIPE once, the next successful established connection gets wasted. The two parts of the fix: * Do not cause the thread to restart if we detect the issue with the sockets while we are in C_WF_CONNECTION. * Make sure that all actions that would have set us to C_BROKEN_PIPE happen before the state change to C_WF_REPORT_PARAMS. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
80f9fd55a6
commit
1e86ac48af
|
@ -1318,7 +1318,7 @@ __drbd_set_state(struct drbd_conf *mdev, union drbd_state ns,
|
|||
drbd_thread_stop_nowait(&mdev->receiver);
|
||||
|
||||
/* Upon network failure, we need to restart the receiver. */
|
||||
if (os.conn > C_TEAR_DOWN &&
|
||||
if (os.conn > C_WF_CONNECTION &&
|
||||
ns.conn <= C_TEAR_DOWN && ns.conn >= C_TIMEOUT)
|
||||
drbd_thread_restart_nowait(&mdev->receiver);
|
||||
|
||||
|
|
|
@ -888,17 +888,12 @@ retry:
|
|||
}
|
||||
}
|
||||
|
||||
if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
|
||||
return 0;
|
||||
|
||||
sock->sk->sk_sndtimeo = mdev->net_conf->timeout*HZ/10;
|
||||
sock->sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
|
||||
|
||||
atomic_set(&mdev->packet_seq, 0);
|
||||
mdev->peer_seq = 0;
|
||||
|
||||
drbd_thread_start(&mdev->asender);
|
||||
|
||||
if (drbd_send_protocol(mdev) == -1)
|
||||
return -1;
|
||||
drbd_send_sync_param(mdev, &mdev->sync_conf);
|
||||
|
@ -907,6 +902,11 @@ retry:
|
|||
drbd_send_state(mdev);
|
||||
clear_bit(USE_DEGR_WFC_T, &mdev->flags);
|
||||
clear_bit(RESIZE_PENDING, &mdev->flags);
|
||||
|
||||
if (drbd_request_state(mdev, NS(conn, C_WF_REPORT_PARAMS)) < SS_SUCCESS)
|
||||
return 0;
|
||||
|
||||
drbd_thread_start(&mdev->asender);
|
||||
mod_timer(&mdev->request_timer, jiffies + HZ); /* just start it here. */
|
||||
|
||||
return 1;
|
||||
|
|
Loading…
Reference in New Issue