block-5.14-2021-07-30
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmEEEz8QHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgpglXD/9CGREpOf1W5oqOScpTygjehwrRnAYisQv6 Oca/qGHBa61BTN3taAJc4NMwl+IwFBER2kdTcOyz8hNmyAUPyRmFND0mG2vGTzQA P9+ekiRKCJ1aRLsnyBL0JBbmvdoPMBHz39P165vMWMrVmnlpcPKoYDS0itHtYYNP VD5Y3A9ACGMDglipDmL+3tsXQo/AoJqRO8WGMUBY2qJ0lasYuCbPpzq0kHzXi6kE 0X64bg6JOZVd3wdyWywKahW3ntsVNLswRUBzLVrnjwE29UuBGWgF+/vwyW/Ob0yS ojafKvehCYnV8Q7IatASOtbwGLvLKgpJZXf7VUEsYnSD6SnmoZctjMjRdyLhNWut lD86Y+eWjQM0pUsOVPykfrV2hd9CrhjyRFskcbI0SJRlMOl0Lstl/X17efDWcDmz 1/V8ub3gKA3HF2Gc/QKhPJDClxM7SaWnsAO3Rk+qJ6bT4EiiRg2GewI1C7YNpmGW ty1fqcQE36JtSWadH4KL/evmX258ROfn3QT1nut2jpNsd1RQ+hHBcjcfeOx6n1GX ALxT8LnmlVYbAUwQvXJcqFcft8K3JoB5ZXT74lat/CAbIKhfEUeSUiqnQcQ8kJLW MTKviuZ9eJHO6/E7vw08ARDR0PmpSFqvc6rK9DiIM/kmVDz8OdLMovTqzX/hIzUT 7IfyHzQbwg== =5FG2 -----END PGP SIGNATURE----- Merge tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block Pull block fixes from Jens Axboe: - gendisk freeing fix (Christoph) - blk-iocost wake ordering fix (Tejun) - tag allocation error handling fix (John) - loop locking fix. While this isn't the prettiest fix in the world, nobody has any good alternatives for 5.14. Something to likely revisit for 5.15. (Tetsuo) * tag 'block-5.14-2021-07-30' of git://git.kernel.dk/linux-block: block: delay freeing the gendisk blk-iocost: fix operation ordering in iocg_wake_fn() blk-mq-sched: Fix blk_mq_sched_alloc_tags() error handling loop: reintroduce global lock for safe loop_validate_file() traversal
This commit is contained in:
commit
4669e13cd6
|
@ -1440,16 +1440,17 @@ static int iocg_wake_fn(struct wait_queue_entry *wq_entry, unsigned mode,
|
|||
return -1;
|
||||
|
||||
iocg_commit_bio(ctx->iocg, wait->bio, wait->abs_cost, cost);
|
||||
wait->committed = true;
|
||||
|
||||
/*
|
||||
* autoremove_wake_function() removes the wait entry only when it
|
||||
* actually changed the task state. We want the wait always
|
||||
* removed. Remove explicitly and use default_wake_function().
|
||||
* actually changed the task state. We want the wait always removed.
|
||||
* Remove explicitly and use default_wake_function(). Note that the
|
||||
* order of operations is important as finish_wait() tests whether
|
||||
* @wq_entry is removed without grabbing the lock.
|
||||
*/
|
||||
list_del_init(&wq_entry->entry);
|
||||
wait->committed = true;
|
||||
|
||||
default_wake_function(wq_entry, mode, flags, key);
|
||||
list_del_init_careful(&wq_entry->entry);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -515,17 +515,6 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
|
|||
percpu_ref_put(&q->q_usage_counter);
|
||||
}
|
||||
|
||||
static void blk_mq_sched_free_tags(struct blk_mq_tag_set *set,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
{
|
||||
if (hctx->sched_tags) {
|
||||
blk_mq_free_rqs(set, hctx->sched_tags, hctx_idx);
|
||||
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
|
||||
hctx->sched_tags = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int blk_mq_sched_alloc_tags(struct request_queue *q,
|
||||
struct blk_mq_hw_ctx *hctx,
|
||||
unsigned int hctx_idx)
|
||||
|
@ -539,8 +528,10 @@ static int blk_mq_sched_alloc_tags(struct request_queue *q,
|
|||
return -ENOMEM;
|
||||
|
||||
ret = blk_mq_alloc_rqs(set, hctx->sched_tags, hctx_idx, q->nr_requests);
|
||||
if (ret)
|
||||
blk_mq_sched_free_tags(set, hctx, hctx_idx);
|
||||
if (ret) {
|
||||
blk_mq_free_rq_map(hctx->sched_tags, set->flags);
|
||||
hctx->sched_tags = NULL;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1079,10 +1079,9 @@ static void disk_release(struct device *dev)
|
|||
disk_release_events(disk);
|
||||
kfree(disk->random);
|
||||
xa_destroy(&disk->part_tbl);
|
||||
bdput(disk->part0);
|
||||
if (test_bit(GD_QUEUE_REF, &disk->state) && disk->queue)
|
||||
blk_put_queue(disk->queue);
|
||||
kfree(disk);
|
||||
bdput(disk->part0); /* frees the disk */
|
||||
}
|
||||
struct class block_class = {
|
||||
.name = "block",
|
||||
|
|
|
@ -88,6 +88,47 @@
|
|||
|
||||
static DEFINE_IDR(loop_index_idr);
|
||||
static DEFINE_MUTEX(loop_ctl_mutex);
|
||||
static DEFINE_MUTEX(loop_validate_mutex);
|
||||
|
||||
/**
|
||||
* loop_global_lock_killable() - take locks for safe loop_validate_file() test
|
||||
*
|
||||
* @lo: struct loop_device
|
||||
* @global: true if @lo is about to bind another "struct loop_device", false otherwise
|
||||
*
|
||||
* Returns 0 on success, -EINTR otherwise.
|
||||
*
|
||||
* Since loop_validate_file() traverses on other "struct loop_device" if
|
||||
* is_loop_device() is true, we need a global lock for serializing concurrent
|
||||
* loop_configure()/loop_change_fd()/__loop_clr_fd() calls.
|
||||
*/
|
||||
static int loop_global_lock_killable(struct loop_device *lo, bool global)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (global) {
|
||||
err = mutex_lock_killable(&loop_validate_mutex);
|
||||
if (err)
|
||||
return err;
|
||||
}
|
||||
err = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (err && global)
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
/**
|
||||
* loop_global_unlock() - release locks taken by loop_global_lock_killable()
|
||||
*
|
||||
* @lo: struct loop_device
|
||||
* @global: true if @lo was about to bind another "struct loop_device", false otherwise
|
||||
*/
|
||||
static void loop_global_unlock(struct loop_device *lo, bool global)
|
||||
{
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
if (global)
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
}
|
||||
|
||||
static int max_part;
|
||||
static int part_shift;
|
||||
|
@ -672,13 +713,15 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
|
|||
while (is_loop_device(f)) {
|
||||
struct loop_device *l;
|
||||
|
||||
lockdep_assert_held(&loop_validate_mutex);
|
||||
if (f->f_mapping->host->i_rdev == bdev->bd_dev)
|
||||
return -EBADF;
|
||||
|
||||
l = I_BDEV(f->f_mapping->host)->bd_disk->private_data;
|
||||
if (l->lo_state != Lo_bound) {
|
||||
if (l->lo_state != Lo_bound)
|
||||
return -EINVAL;
|
||||
}
|
||||
/* Order wrt setting lo->lo_backing_file in loop_configure(). */
|
||||
rmb();
|
||||
f = l->lo_backing_file;
|
||||
}
|
||||
if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode))
|
||||
|
@ -697,13 +740,18 @@ static int loop_validate_file(struct file *file, struct block_device *bdev)
|
|||
static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
||||
unsigned int arg)
|
||||
{
|
||||
struct file *file = NULL, *old_file;
|
||||
int error;
|
||||
bool partscan;
|
||||
struct file *file = fget(arg);
|
||||
struct file *old_file;
|
||||
int error;
|
||||
bool partscan;
|
||||
bool is_loop;
|
||||
|
||||
error = mutex_lock_killable(&lo->lo_mutex);
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
is_loop = is_loop_device(file);
|
||||
error = loop_global_lock_killable(lo, is_loop);
|
||||
if (error)
|
||||
return error;
|
||||
goto out_putf;
|
||||
error = -ENXIO;
|
||||
if (lo->lo_state != Lo_bound)
|
||||
goto out_err;
|
||||
|
@ -713,11 +761,6 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
|||
if (!(lo->lo_flags & LO_FLAGS_READ_ONLY))
|
||||
goto out_err;
|
||||
|
||||
error = -EBADF;
|
||||
file = fget(arg);
|
||||
if (!file)
|
||||
goto out_err;
|
||||
|
||||
error = loop_validate_file(file, bdev);
|
||||
if (error)
|
||||
goto out_err;
|
||||
|
@ -740,7 +783,16 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
|||
loop_update_dio(lo);
|
||||
blk_mq_unfreeze_queue(lo->lo_queue);
|
||||
partscan = lo->lo_flags & LO_FLAGS_PARTSCAN;
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, is_loop);
|
||||
|
||||
/*
|
||||
* Flush loop_validate_file() before fput(), for l->lo_backing_file
|
||||
* might be pointing at old_file which might be the last reference.
|
||||
*/
|
||||
if (!is_loop) {
|
||||
mutex_lock(&loop_validate_mutex);
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
}
|
||||
/*
|
||||
* We must drop file reference outside of lo_mutex as dropping
|
||||
* the file ref can take open_mutex which creates circular locking
|
||||
|
@ -752,9 +804,9 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
|
|||
return 0;
|
||||
|
||||
out_err:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
if (file)
|
||||
fput(file);
|
||||
loop_global_unlock(lo, is_loop);
|
||||
out_putf:
|
||||
fput(file);
|
||||
return error;
|
||||
}
|
||||
|
||||
|
@ -1136,22 +1188,22 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
|||
struct block_device *bdev,
|
||||
const struct loop_config *config)
|
||||
{
|
||||
struct file *file;
|
||||
struct inode *inode;
|
||||
struct file *file = fget(config->fd);
|
||||
struct inode *inode;
|
||||
struct address_space *mapping;
|
||||
int error;
|
||||
loff_t size;
|
||||
bool partscan;
|
||||
unsigned short bsize;
|
||||
int error;
|
||||
loff_t size;
|
||||
bool partscan;
|
||||
unsigned short bsize;
|
||||
bool is_loop;
|
||||
|
||||
if (!file)
|
||||
return -EBADF;
|
||||
is_loop = is_loop_device(file);
|
||||
|
||||
/* This is safe, since we have a reference from open(). */
|
||||
__module_get(THIS_MODULE);
|
||||
|
||||
error = -EBADF;
|
||||
file = fget(config->fd);
|
||||
if (!file)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If we don't hold exclusive handle for the device, upgrade to it
|
||||
* here to avoid changing device under exclusive owner.
|
||||
|
@ -1162,7 +1214,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
|||
goto out_putf;
|
||||
}
|
||||
|
||||
error = mutex_lock_killable(&lo->lo_mutex);
|
||||
error = loop_global_lock_killable(lo, is_loop);
|
||||
if (error)
|
||||
goto out_bdev;
|
||||
|
||||
|
@ -1242,6 +1294,9 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
|||
size = get_loop_size(lo, file);
|
||||
loop_set_size(lo, size);
|
||||
|
||||
/* Order wrt reading lo_state in loop_validate_file(). */
|
||||
wmb();
|
||||
|
||||
lo->lo_state = Lo_bound;
|
||||
if (part_shift)
|
||||
lo->lo_flags |= LO_FLAGS_PARTSCAN;
|
||||
|
@ -1253,7 +1308,7 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
|||
* put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev).
|
||||
*/
|
||||
bdgrab(bdev);
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, is_loop);
|
||||
if (partscan)
|
||||
loop_reread_partitions(lo);
|
||||
if (!(mode & FMODE_EXCL))
|
||||
|
@ -1261,13 +1316,12 @@ static int loop_configure(struct loop_device *lo, fmode_t mode,
|
|||
return 0;
|
||||
|
||||
out_unlock:
|
||||
mutex_unlock(&lo->lo_mutex);
|
||||
loop_global_unlock(lo, is_loop);
|
||||
out_bdev:
|
||||
if (!(mode & FMODE_EXCL))
|
||||
bd_abort_claiming(bdev, loop_configure);
|
||||
out_putf:
|
||||
fput(file);
|
||||
out:
|
||||
/* This is safe: open() is still holding a reference. */
|
||||
module_put(THIS_MODULE);
|
||||
return error;
|
||||
|
@ -1283,6 +1337,18 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
|
|||
int lo_number;
|
||||
struct loop_worker *pos, *worker;
|
||||
|
||||
/*
|
||||
* Flush loop_configure() and loop_change_fd(). It is acceptable for
|
||||
* loop_validate_file() to succeed, for actual clear operation has not
|
||||
* started yet.
|
||||
*/
|
||||
mutex_lock(&loop_validate_mutex);
|
||||
mutex_unlock(&loop_validate_mutex);
|
||||
/*
|
||||
* loop_validate_file() now fails because l->lo_state != Lo_bound
|
||||
* became visible.
|
||||
*/
|
||||
|
||||
mutex_lock(&lo->lo_mutex);
|
||||
if (WARN_ON_ONCE(lo->lo_state != Lo_rundown)) {
|
||||
err = -ENXIO;
|
||||
|
|
|
@ -812,6 +812,8 @@ static void bdev_free_inode(struct inode *inode)
|
|||
free_percpu(bdev->bd_stats);
|
||||
kfree(bdev->bd_meta_info);
|
||||
|
||||
if (!bdev_is_partition(bdev))
|
||||
kfree(bdev->bd_disk);
|
||||
kmem_cache_free(bdev_cachep, BDEV_I(inode));
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue