drbd: make suspend_io() / resume_io() must be thread and recursion safe
Avoid to prematurely resume application IO: don't set/clear a single bit, but inc/dec an atomic counter. Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com> Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com> Signed-off-by: Jens Axboe <axboe@fb.com>
This commit is contained in:
parent
f85d9f2d02
commit
7dbb4386b9
|
@ -500,7 +500,6 @@ enum {
|
||||||
|
|
||||||
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
|
MD_NO_FUA, /* Users wants us to not use FUA/FLUSH on meta data dev */
|
||||||
|
|
||||||
SUSPEND_IO, /* suspend application io */
|
|
||||||
BITMAP_IO, /* suspend application io;
|
BITMAP_IO, /* suspend application io;
|
||||||
once no more io in flight, start bitmap io */
|
once no more io in flight, start bitmap io */
|
||||||
BITMAP_IO_QUEUED, /* Started bitmap IO */
|
BITMAP_IO_QUEUED, /* Started bitmap IO */
|
||||||
|
@ -880,6 +879,7 @@ struct drbd_device {
|
||||||
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
|
atomic_t rs_pending_cnt; /* RS request/data packets on the wire */
|
||||||
atomic_t unacked_cnt; /* Need to send replies for */
|
atomic_t unacked_cnt; /* Need to send replies for */
|
||||||
atomic_t local_cnt; /* Waiting for local completion */
|
atomic_t local_cnt; /* Waiting for local completion */
|
||||||
|
atomic_t suspend_cnt;
|
||||||
|
|
||||||
/* Interval tree of pending local requests */
|
/* Interval tree of pending local requests */
|
||||||
struct rb_root read_requests;
|
struct rb_root read_requests;
|
||||||
|
@ -2263,7 +2263,7 @@ static inline bool may_inc_ap_bio(struct drbd_device *device)
|
||||||
|
|
||||||
if (drbd_suspended(device))
|
if (drbd_suspended(device))
|
||||||
return false;
|
return false;
|
||||||
if (test_bit(SUSPEND_IO, &device->flags))
|
if (atomic_read(&device->suspend_cnt))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* to avoid potential deadlock or bitmap corruption,
|
/* to avoid potential deadlock or bitmap corruption,
|
||||||
|
|
|
@ -865,9 +865,11 @@ char *ppsize(char *buf, unsigned long long size)
|
||||||
* and can be long lived.
|
* and can be long lived.
|
||||||
* This changes an device->flag, is triggered by drbd internals,
|
* This changes an device->flag, is triggered by drbd internals,
|
||||||
* and should be short-lived. */
|
* and should be short-lived. */
|
||||||
|
/* It needs to be a counter, since multiple threads might
|
||||||
|
independently suspend and resume IO. */
|
||||||
void drbd_suspend_io(struct drbd_device *device)
|
void drbd_suspend_io(struct drbd_device *device)
|
||||||
{
|
{
|
||||||
set_bit(SUSPEND_IO, &device->flags);
|
atomic_inc(&device->suspend_cnt);
|
||||||
if (drbd_suspended(device))
|
if (drbd_suspended(device))
|
||||||
return;
|
return;
|
||||||
wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
|
wait_event(device->misc_wait, !atomic_read(&device->ap_bio_cnt));
|
||||||
|
@ -875,8 +877,8 @@ void drbd_suspend_io(struct drbd_device *device)
|
||||||
|
|
||||||
void drbd_resume_io(struct drbd_device *device)
|
void drbd_resume_io(struct drbd_device *device)
|
||||||
{
|
{
|
||||||
clear_bit(SUSPEND_IO, &device->flags);
|
if (atomic_dec_and_test(&device->suspend_cnt))
|
||||||
wake_up(&device->misc_wait);
|
wake_up(&device->misc_wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1484,7 +1484,7 @@ int drbd_bitmap_io_from_worker(struct drbd_device *device,
|
||||||
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
|
D_ASSERT(device, current == first_peer_device(device)->connection->worker.task);
|
||||||
|
|
||||||
/* open coded non-blocking drbd_suspend_io(device); */
|
/* open coded non-blocking drbd_suspend_io(device); */
|
||||||
set_bit(SUSPEND_IO, &device->flags);
|
atomic_inc(&device->suspend_cnt);
|
||||||
|
|
||||||
drbd_bm_lock(device, why, flags);
|
drbd_bm_lock(device, why, flags);
|
||||||
rv = io_fn(device);
|
rv = io_fn(device);
|
||||||
|
|
Loading…
Reference in New Issue