drbd: Implemented the disk-timeout option
When the disk-timeout is active, and it expires for a single request, we consider the local disk as D_FAILED. Note: With this change, I made both timeout based state transitions HARD state transitions. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
This commit is contained in:
parent
02ee8f95fa
commit
dfa8bedbfe
|
@ -1404,6 +1404,9 @@ static void after_state_ch(struct drbd_conf *mdev, union drbd_state os,
|
|||
/* Here we have the actions that are performed after a
|
||||
state change. This function might sleep */
|
||||
|
||||
if (os.disk <= D_NEGOTIATING && ns.disk > D_NEGOTIATING)
|
||||
mod_timer(&mdev->request_timer, jiffies + HZ);
|
||||
|
||||
nsm.i = -1;
|
||||
if (ns.susp_nod) {
|
||||
if (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED)
|
||||
|
@ -3318,6 +3321,8 @@ static void drbd_delete_device(unsigned int minor)
|
|||
if (!mdev)
|
||||
return;
|
||||
|
||||
del_timer_sync(&mdev->request_timer);
|
||||
|
||||
/* paranoia asserts */
|
||||
if (mdev->open_cnt != 0)
|
||||
dev_err(DEV, "open_cnt = %d in %s:%u", mdev->open_cnt,
|
||||
|
|
|
@ -3803,8 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev)
|
|||
atomic_set(&mdev->rs_pending_cnt, 0);
|
||||
wake_up(&mdev->misc_wait);
|
||||
|
||||
del_timer(&mdev->request_timer);
|
||||
|
||||
/* make sure syncer is stopped and w_resume_next_sg queued */
|
||||
del_timer_sync(&mdev->resync_timer);
|
||||
resync_timer_fn((unsigned long)mdev);
|
||||
|
|
|
@ -1208,13 +1208,19 @@ void request_timer_fn(unsigned long data)
|
|||
struct drbd_conf *mdev = (struct drbd_conf *) data;
|
||||
struct drbd_request *req; /* oldest request */
|
||||
struct list_head *le;
|
||||
unsigned long et = 0; /* effective timeout = ko_count * timeout */
|
||||
unsigned long ent = 0, dt = 0, et, nt; /* effective timeout = ko_count * timeout */
|
||||
|
||||
if (get_net_conf(mdev)) {
|
||||
et = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
|
||||
ent = mdev->net_conf->timeout*HZ/10 * mdev->net_conf->ko_count;
|
||||
put_net_conf(mdev);
|
||||
}
|
||||
if (!et || mdev->state.conn < C_WF_REPORT_PARAMS)
|
||||
if (get_ldev(mdev)) {
|
||||
dt = mdev->ldev->dc.disk_timeout * HZ / 10;
|
||||
put_ldev(mdev);
|
||||
}
|
||||
et = min_not_zero(dt, ent);
|
||||
|
||||
if (!et || (mdev->state.conn < C_WF_REPORT_PARAMS && mdev->state.disk <= D_FAILED))
|
||||
return; /* Recurring timer stopped */
|
||||
|
||||
spin_lock_irq(&mdev->req_lock);
|
||||
|
@ -1227,17 +1233,19 @@ void request_timer_fn(unsigned long data)
|
|||
|
||||
le = le->prev;
|
||||
req = list_entry(le, struct drbd_request, tl_requests);
|
||||
if (time_is_before_eq_jiffies(req->start_time + et)) {
|
||||
if (req->rq_state & RQ_NET_PENDING) {
|
||||
if (ent && req->rq_state & RQ_NET_PENDING) {
|
||||
if (time_is_before_eq_jiffies(req->start_time + ent)) {
|
||||
dev_warn(DEV, "Remote failed to finish a request within ko-count * timeout\n");
|
||||
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE, NULL);
|
||||
} else {
|
||||
dev_warn(DEV, "Local backing block device frozen?\n");
|
||||
mod_timer(&mdev->request_timer, jiffies + et);
|
||||
_drbd_set_state(_NS(mdev, conn, C_TIMEOUT), CS_VERBOSE | CS_HARD, NULL);
|
||||
}
|
||||
} else {
|
||||
mod_timer(&mdev->request_timer, req->start_time + et);
|
||||
}
|
||||
|
||||
if (dt && req->rq_state & RQ_LOCAL_PENDING) {
|
||||
if (time_is_before_eq_jiffies(req->start_time + dt)) {
|
||||
dev_warn(DEV, "Local backing device failed to meet the disk-timeout\n");
|
||||
__drbd_chk_io_error(mdev, 1);
|
||||
}
|
||||
}
|
||||
nt = (time_is_before_eq_jiffies(req->start_time + et) ? jiffies : req->start_time) + et;
|
||||
spin_unlock_irq(&mdev->req_lock);
|
||||
mod_timer(&mdev->request_timer, nt);
|
||||
}
|
||||
|
|
|
@ -48,6 +48,11 @@
|
|||
#define DRBD_TIMEOUT_MAX 600
|
||||
#define DRBD_TIMEOUT_DEF 60 /* 6 seconds */
|
||||
|
||||
/* If backing disk takes longer than disk_timeout, mark the disk as failed */
|
||||
#define DRBD_DISK_TIMEOUT_MIN 0 /* 0 = disabled */
|
||||
#define DRBD_DISK_TIMEOUT_MAX 6000 /* 10 Minutes */
|
||||
#define DRBD_DISK_TIMEOUT_DEF 0 /* disabled */
|
||||
|
||||
/* active connection retries when C_WF_CONNECTION */
|
||||
#define DRBD_CONNECT_INT_MIN 1
|
||||
#define DRBD_CONNECT_INT_MAX 120
|
||||
|
|
|
@ -31,6 +31,7 @@ NL_PACKET(disk_conf, 3,
|
|||
NL_INTEGER( 56, T_MAY_IGNORE, max_bio_bvecs)
|
||||
NL_BIT( 57, T_MAY_IGNORE, no_disk_barrier)
|
||||
NL_BIT( 58, T_MAY_IGNORE, no_disk_drain)
|
||||
NL_INTEGER( 89, T_MAY_IGNORE, disk_timeout)
|
||||
)
|
||||
|
||||
NL_PACKET(detach, 4,
|
||||
|
|
Loading…
Reference in New Issue