Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "The important part of this pull is Filipe's set of fixes for btrfs device replacement. Filipe fixed a few issues seen on the list and a number he found on his own" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent Btrfs: fix race between device replace and read repair Btrfs: fix race between device replace and discard Btrfs: fix race between device replace and chunk allocation Btrfs: fix race setting block group back to RW mode during device replace Btrfs: fix unprotected assignment of the left cursor for device replace Btrfs: fix race setting block group readonly during device replace Btrfs: fix race between device replace and block group removal Btrfs: fix race between readahead and device replace/removal
This commit is contained in:
commit
b2d5ad8223
|
@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
|||
struct btrfs_bio *bbio = NULL;
|
||||
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bbio has devices
|
||||
* associated to its stripes that don't go away while we are discarding.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(root->fs_info);
|
||||
/* Tell the block device(s) that the sectors can be discarded */
|
||||
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
|
||||
bytenr, &num_bytes, &bbio, 0);
|
||||
|
@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
|||
}
|
||||
btrfs_put_bbio(bbio);
|
||||
}
|
||||
btrfs_bio_counter_dec(root->fs_info);
|
||||
|
||||
if (actual_bytes)
|
||||
*actual_bytes = discarded_bytes;
|
||||
|
|
|
@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
|||
bio->bi_iter.bi_size = 0;
|
||||
map_length = length;
|
||||
|
||||
/*
|
||||
* Avoid races with device replace and make sure our bbio has devices
|
||||
* associated to its stripes that don't go away while we are doing the
|
||||
* read repair operation.
|
||||
*/
|
||||
btrfs_bio_counter_inc_blocked(fs_info);
|
||||
ret = btrfs_map_block(fs_info, WRITE, logical,
|
||||
&map_length, &bbio, mirror_num);
|
||||
if (ret) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
|||
dev = bbio->stripes[mirror_num-1].dev;
|
||||
btrfs_put_bbio(bbio);
|
||||
if (!dev || !dev->bdev || !dev->writeable) {
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return -EIO;
|
||||
}
|
||||
|
@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
|||
|
||||
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
|
||||
/* try to remap that extent elsewhere? */
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||
return -EIO;
|
||||
|
@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
|||
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
|
||||
btrfs_ino(inode), start,
|
||||
rcu_str_deref(dev->name), sector);
|
||||
btrfs_bio_counter_dec(fs_info);
|
||||
bio_put(bio);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -6979,7 +6979,18 @@ insert:
|
|||
* existing will always be non-NULL, since there must be
|
||||
* extent causing the -EEXIST.
|
||||
*/
|
||||
if (start >= extent_map_end(existing) ||
|
||||
if (existing->start == em->start &&
|
||||
extent_map_end(existing) == extent_map_end(em) &&
|
||||
em->block_start == existing->block_start) {
|
||||
/*
|
||||
* these two extents are the same, it happens
|
||||
* with inlines especially
|
||||
*/
|
||||
free_extent_map(em);
|
||||
em = existing;
|
||||
err = 0;
|
||||
|
||||
} else if (start >= extent_map_end(existing) ||
|
||||
start <= existing->start) {
|
||||
/*
|
||||
* The existing extent map is the one nearest to
|
||||
|
|
|
@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
|||
return count;
|
||||
}
|
||||
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len)
|
||||
{
|
||||
struct btrfs_root *root;
|
||||
struct list_head splice;
|
||||
int done;
|
||||
int total_done = 0;
|
||||
|
||||
INIT_LIST_HEAD(&splice);
|
||||
|
||||
|
@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
|||
done = btrfs_wait_ordered_extents(root, nr,
|
||||
range_start, range_len);
|
||||
btrfs_put_fs_root(root);
|
||||
total_done += done;
|
||||
|
||||
spin_lock(&fs_info->ordered_root_lock);
|
||||
if (nr != -1) {
|
||||
|
@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
|||
list_splice_tail(&splice, &fs_info->ordered_roots);
|
||||
spin_unlock(&fs_info->ordered_root_lock);
|
||||
mutex_unlock(&fs_info->ordered_operations_mutex);
|
||||
|
||||
return total_done;
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
|||
u32 *sum, int len);
|
||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||
const u64 range_start, const u64 range_len);
|
||||
void btrfs_get_logged_extents(struct inode *inode,
|
||||
struct list_head *logged_list,
|
||||
|
|
|
@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
|||
|
||||
do {
|
||||
enqueued = 0;
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||
if (atomic_read(&device->reada_in_flight) <
|
||||
MAX_IN_FLIGHT)
|
||||
enqueued += reada_start_machine_dev(fs_info,
|
||||
device);
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
total += enqueued;
|
||||
} while (enqueued && total < 10000);
|
||||
|
||||
|
|
|
@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||
*/
|
||||
scrub_pause_on(fs_info);
|
||||
ret = btrfs_inc_block_group_ro(root, cache);
|
||||
if (!ret && is_dev_replace) {
|
||||
/*
|
||||
* If we are doing a device replace wait for any tasks
|
||||
* that started dellaloc right before we set the block
|
||||
* group to RO mode, as they might have just allocated
|
||||
* an extent from it or decided they could do a nocow
|
||||
* write. And if any such tasks did that, wait for their
|
||||
* ordered extents to complete and then commit the
|
||||
* current transaction, so that we can later see the new
|
||||
* extent items in the extent tree - the ordered extents
|
||||
* create delayed data references (for cow writes) when
|
||||
* they complete, which will be run and insert the
|
||||
* corresponding extent items into the extent tree when
|
||||
* we commit the transaction they used when running
|
||||
* inode.c:btrfs_finish_ordered_io(). We later use
|
||||
* the commit root of the extent tree to find extents
|
||||
* to copy from the srcdev into the tgtdev, and we don't
|
||||
* want to miss any new extents.
|
||||
*/
|
||||
btrfs_wait_block_group_reservations(cache);
|
||||
btrfs_wait_nocow_writers(cache);
|
||||
ret = btrfs_wait_ordered_roots(fs_info, -1,
|
||||
cache->key.objectid,
|
||||
cache->key.offset);
|
||||
if (ret > 0) {
|
||||
struct btrfs_trans_handle *trans;
|
||||
|
||||
trans = btrfs_join_transaction(root);
|
||||
if (IS_ERR(trans))
|
||||
ret = PTR_ERR(trans);
|
||||
else
|
||||
ret = btrfs_commit_transaction(trans,
|
||||
root);
|
||||
if (ret) {
|
||||
scrub_pause_off(fs_info);
|
||||
btrfs_put_block_group(cache);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
scrub_pause_off(fs_info);
|
||||
|
||||
if (ret == 0) {
|
||||
|
@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||
break;
|
||||
}
|
||||
|
||||
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
|
||||
dev_replace->cursor_right = found_key.offset + length;
|
||||
dev_replace->cursor_left = found_key.offset;
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
|
||||
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
|
||||
found_key.offset, cache, is_dev_replace);
|
||||
|
||||
|
@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||
|
||||
scrub_pause_off(fs_info);
|
||||
|
||||
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
|
||||
dev_replace->cursor_left = dev_replace->cursor_right;
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
|
||||
|
||||
if (ro_set)
|
||||
btrfs_dec_block_group_ro(root, cache);
|
||||
|
||||
|
@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
|||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
dev_replace->cursor_left = dev_replace->cursor_right;
|
||||
dev_replace->item_needs_writeback = 1;
|
||||
skip:
|
||||
key.offset = found_key.offset + length;
|
||||
btrfs_release_path(path);
|
||||
|
|
|
@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
|||
u64 dev_extent_len = 0;
|
||||
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
||||
int i, ret = 0;
|
||||
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
|
||||
|
||||
/* Just in case */
|
||||
root = root->fs_info->chunk_root;
|
||||
|
@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
|||
check_system_chunk(trans, extent_root, map->type);
|
||||
unlock_chunks(root->fs_info->chunk_root);
|
||||
|
||||
/*
|
||||
* Take the device list mutex to prevent races with the final phase of
|
||||
* a device replace operation that replaces the device object associated
|
||||
* with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
|
||||
*/
|
||||
mutex_lock(&fs_devices->device_list_mutex);
|
||||
for (i = 0; i < map->num_stripes; i++) {
|
||||
struct btrfs_device *device = map->stripes[i].dev;
|
||||
ret = btrfs_free_dev_extent(trans, device,
|
||||
map->stripes[i].physical,
|
||||
&dev_extent_len);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out;
|
||||
}
|
||||
|
@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
|||
if (map->stripes[i].dev) {
|
||||
ret = btrfs_update_device(trans, map->stripes[i].dev);
|
||||
if (ret) {
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
mutex_unlock(&fs_devices->device_list_mutex);
|
||||
|
||||
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
|
||||
if (ret) {
|
||||
btrfs_abort_transaction(trans, root, ret);
|
||||
|
@ -5762,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
|
|||
}
|
||||
}
|
||||
if (found) {
|
||||
if (physical_of_found + map->stripe_len <=
|
||||
dev_replace->cursor_left) {
|
||||
struct btrfs_bio_stripe *tgtdev_stripe =
|
||||
bbio->stripes + num_stripes;
|
||||
struct btrfs_bio_stripe *tgtdev_stripe =
|
||||
bbio->stripes + num_stripes;
|
||||
|
||||
tgtdev_stripe->physical = physical_of_found;
|
||||
tgtdev_stripe->length =
|
||||
bbio->stripes[index_srcdev].length;
|
||||
tgtdev_stripe->dev = dev_replace->tgtdev;
|
||||
bbio->tgtdev_map[index_srcdev] = num_stripes;
|
||||
tgtdev_stripe->physical = physical_of_found;
|
||||
tgtdev_stripe->length =
|
||||
bbio->stripes[index_srcdev].length;
|
||||
tgtdev_stripe->dev = dev_replace->tgtdev;
|
||||
bbio->tgtdev_map[index_srcdev] = num_stripes;
|
||||
|
||||
tgtdev_indexes++;
|
||||
num_stripes++;
|
||||
}
|
||||
tgtdev_indexes++;
|
||||
num_stripes++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue