genhd: Fix BUG in blkdev_open()
When two blkdev_open() calls for a partition race with device removal and recreation, we can hit BUG_ON(!bd_may_claim(bdev, whole, holder)) in blkdev_open(). The race can happen as follows: CPU0 CPU1 CPU2 del_gendisk() bdev_unhash_inode(part1); blkdev_open(part1, O_EXCL) blkdev_open(part1, O_EXCL) bdev = bd_acquire() bdev = bd_acquire() blkdev_get(bdev) bd_start_claiming(bdev) - finds old inode 'whole' bd_prepare_to_claim() -> 0 bdev_unhash_inode(whole); <device removed> <new device under same number created> blkdev_get(bdev); bd_start_claiming(bdev) - finds new inode 'whole' bd_prepare_to_claim() - this also succeeds as we have different 'whole' here... - bad things happen now as we have two exclusive openers of the same bdev The problem here is that block device opens can see various intermediate states while gendisk is shutting down and then being recreated. We fix the problem by introducing new lookup_sem in gendisk that synchronizes gendisk deletion with get_gendisk() and furthermore by making sure that get_gendisk() does not return gendisk that is being (or has been) deleted. This makes sure that once we ever manage to look up newly created bdev inode, we are also guaranteed that following get_gendisk() will either return failure (and we fail open) or it returns gendisk for the new device and following bdget_disk() will return new bdev inode (i.e., blkdev_open() follows the path as if it is completely run after new device is created). Reported-and-analyzed-by: Hou Tao <houtao1@huawei.com> Tested-by: Hou Tao <houtao1@huawei.com> Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
897366537f
commit
56c0908c85
|
@ -717,6 +717,11 @@ void del_gendisk(struct gendisk *disk)
|
||||||
blk_integrity_del(disk);
|
blk_integrity_del(disk);
|
||||||
disk_del_events(disk);
|
disk_del_events(disk);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Block lookups of the disk until all bdevs are unhashed and the
|
||||||
|
* disk is marked as dead (GENHD_FL_UP cleared).
|
||||||
|
*/
|
||||||
|
down_write(&disk->lookup_sem);
|
||||||
/* invalidate stuff */
|
/* invalidate stuff */
|
||||||
disk_part_iter_init(&piter, disk,
|
disk_part_iter_init(&piter, disk,
|
||||||
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
DISK_PITER_INCL_EMPTY | DISK_PITER_REVERSE);
|
||||||
|
@ -731,6 +736,7 @@ void del_gendisk(struct gendisk *disk)
|
||||||
bdev_unhash_inode(disk_devt(disk));
|
bdev_unhash_inode(disk_devt(disk));
|
||||||
set_capacity(disk, 0);
|
set_capacity(disk, 0);
|
||||||
disk->flags &= ~GENHD_FL_UP;
|
disk->flags &= ~GENHD_FL_UP;
|
||||||
|
up_write(&disk->lookup_sem);
|
||||||
|
|
||||||
if (!(disk->flags & GENHD_FL_HIDDEN))
|
if (!(disk->flags & GENHD_FL_HIDDEN))
|
||||||
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
|
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
|
||||||
|
@ -816,9 +822,21 @@ struct gendisk *get_gendisk(dev_t devt, int *partno)
|
||||||
spin_unlock_bh(&ext_devt_lock);
|
spin_unlock_bh(&ext_devt_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (disk && unlikely(disk->flags & GENHD_FL_HIDDEN)) {
|
if (!disk)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Synchronize with del_gendisk() to not return disk that is being
|
||||||
|
* destroyed.
|
||||||
|
*/
|
||||||
|
down_read(&disk->lookup_sem);
|
||||||
|
if (unlikely((disk->flags & GENHD_FL_HIDDEN) ||
|
||||||
|
!(disk->flags & GENHD_FL_UP))) {
|
||||||
|
up_read(&disk->lookup_sem);
|
||||||
put_disk_and_module(disk);
|
put_disk_and_module(disk);
|
||||||
disk = NULL;
|
disk = NULL;
|
||||||
|
} else {
|
||||||
|
up_read(&disk->lookup_sem);
|
||||||
}
|
}
|
||||||
return disk;
|
return disk;
|
||||||
}
|
}
|
||||||
|
@ -1418,6 +1436,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
|
||||||
kfree(disk);
|
kfree(disk);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
init_rwsem(&disk->lookup_sem);
|
||||||
disk->node_id = node_id;
|
disk->node_id = node_id;
|
||||||
if (disk_expand_part_tbl(disk, 0)) {
|
if (disk_expand_part_tbl(disk, 0)) {
|
||||||
free_part_stats(&disk->part0);
|
free_part_stats(&disk->part0);
|
||||||
|
|
|
@ -198,6 +198,7 @@ struct gendisk {
|
||||||
void *private_data;
|
void *private_data;
|
||||||
|
|
||||||
int flags;
|
int flags;
|
||||||
|
struct rw_semaphore lookup_sem;
|
||||||
struct kobject *slave_dir;
|
struct kobject *slave_dir;
|
||||||
|
|
||||||
struct timer_rand_state *random;
|
struct timer_rand_state *random;
|
||||||
|
|
Loading…
Reference in New Issue