rbd: don't hold lock_rwsem while running_list is being drained
Currently rbd_quiesce_lock() holds lock_rwsem for read while blocking on releasing_wait completion. On the I/O completion side, each image request also needs to take lock_rwsem for read. Because rw_semaphore implementation doesn't allow new readers after a writer has indicated interest in the lock, this can result in a deadlock if something that needs to take lock_rwsem for write gets involved. For example: 1. watch error occurs 2. rbd_watch_errcb() takes lock_rwsem for write, clears owner_cid and releases lock_rwsem 3. after reestablishing the watch, rbd_reregister_watch() takes lock_rwsem for write and calls rbd_reacquire_lock() 4. rbd_quiesce_lock() downgrades lock_rwsem to for read and blocks on releasing_wait until running_list becomes empty 5. another watch error occurs 6. rbd_watch_errcb() blocks trying to take lock_rwsem for write 7. no in-flight image request can complete and delete itself from running_list because lock_rwsem won't be granted anymore A similar scenario can occur with "lock has been acquired" and "lock has been released" notification handers which also take lock_rwsem for write to update owner_cid. We don't actually get anything useful from sitting on lock_rwsem in rbd_quiesce_lock() -- owner_cid updates certainly don't need to be synchronized with. In fact the whole owner_cid tracking logic could probably be removed from the kernel client because we don't support proxied maintenance operations. Cc: stable@vger.kernel.org # 5.3+ URL: https://tracker.ceph.com/issues/42757 Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Tested-by: Robin Geuze <robin.geuze@nl.team.blue>
This commit is contained in:
parent
8798d070d4
commit
ed9eb71085
|
@ -4100,8 +4100,6 @@ again:
|
|||
|
||||
static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
|
||||
{
|
||||
bool need_wait;
|
||||
|
||||
dout("%s rbd_dev %p\n", __func__, rbd_dev);
|
||||
lockdep_assert_held_write(&rbd_dev->lock_rwsem);
|
||||
|
||||
|
@ -4113,11 +4111,11 @@ static bool rbd_quiesce_lock(struct rbd_device *rbd_dev)
|
|||
*/
|
||||
rbd_dev->lock_state = RBD_LOCK_STATE_RELEASING;
|
||||
rbd_assert(!completion_done(&rbd_dev->releasing_wait));
|
||||
need_wait = !list_empty(&rbd_dev->running_list);
|
||||
downgrade_write(&rbd_dev->lock_rwsem);
|
||||
if (need_wait)
|
||||
wait_for_completion(&rbd_dev->releasing_wait);
|
||||
up_read(&rbd_dev->lock_rwsem);
|
||||
if (list_empty(&rbd_dev->running_list))
|
||||
return true;
|
||||
|
||||
up_write(&rbd_dev->lock_rwsem);
|
||||
wait_for_completion(&rbd_dev->releasing_wait);
|
||||
|
||||
down_write(&rbd_dev->lock_rwsem);
|
||||
if (rbd_dev->lock_state != RBD_LOCK_STATE_RELEASING)
|
||||
|
|
Loading…
Reference in New Issue