Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs
Pull btrfs fixes from Chris Mason: "The important part of this pull is Filipe's set of fixes for btrfs device replacement. Filipe fixed a few issues seen on the list and a number he found on his own" * 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs: Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent Btrfs: fix race between device replace and read repair Btrfs: fix race between device replace and discard Btrfs: fix race between device replace and chunk allocation Btrfs: fix race setting block group back to RW mode during device replace Btrfs: fix unprotected assignment of the left cursor for device replace Btrfs: fix race setting block group readonly during device replace Btrfs: fix race between device replace and block group removal Btrfs: fix race between readahead and device replace/removal
This commit is contained in:
commit
b2d5ad8223
|
@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
||||||
struct btrfs_bio *bbio = NULL;
|
struct btrfs_bio *bbio = NULL;
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid races with device replace and make sure our bbio has devices
|
||||||
|
* associated to its stripes that don't go away while we are discarding.
|
||||||
|
*/
|
||||||
|
btrfs_bio_counter_inc_blocked(root->fs_info);
|
||||||
/* Tell the block device(s) that the sectors can be discarded */
|
/* Tell the block device(s) that the sectors can be discarded */
|
||||||
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
|
ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
|
||||||
bytenr, &num_bytes, &bbio, 0);
|
bytenr, &num_bytes, &bbio, 0);
|
||||||
|
@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
|
||||||
}
|
}
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bbio(bbio);
|
||||||
}
|
}
|
||||||
|
btrfs_bio_counter_dec(root->fs_info);
|
||||||
|
|
||||||
if (actual_bytes)
|
if (actual_bytes)
|
||||||
*actual_bytes = discarded_bytes;
|
*actual_bytes = discarded_bytes;
|
||||||
|
|
|
@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
||||||
bio->bi_iter.bi_size = 0;
|
bio->bi_iter.bi_size = 0;
|
||||||
map_length = length;
|
map_length = length;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Avoid races with device replace and make sure our bbio has devices
|
||||||
|
* associated to its stripes that don't go away while we are doing the
|
||||||
|
* read repair operation.
|
||||||
|
*/
|
||||||
|
btrfs_bio_counter_inc_blocked(fs_info);
|
||||||
ret = btrfs_map_block(fs_info, WRITE, logical,
|
ret = btrfs_map_block(fs_info, WRITE, logical,
|
||||||
&map_length, &bbio, mirror_num);
|
&map_length, &bbio, mirror_num);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
||||||
dev = bbio->stripes[mirror_num-1].dev;
|
dev = bbio->stripes[mirror_num-1].dev;
|
||||||
btrfs_put_bbio(bbio);
|
btrfs_put_bbio(bbio);
|
||||||
if (!dev || !dev->bdev || !dev->writeable) {
|
if (!dev || !dev->bdev || !dev->writeable) {
|
||||||
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
|
@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
||||||
|
|
||||||
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
|
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
|
||||||
/* try to remap that extent elsewhere? */
|
/* try to remap that extent elsewhere? */
|
||||||
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
|
||||||
return -EIO;
|
return -EIO;
|
||||||
|
@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
|
||||||
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
|
"read error corrected: ino %llu off %llu (dev %s sector %llu)",
|
||||||
btrfs_ino(inode), start,
|
btrfs_ino(inode), start,
|
||||||
rcu_str_deref(dev->name), sector);
|
rcu_str_deref(dev->name), sector);
|
||||||
|
btrfs_bio_counter_dec(fs_info);
|
||||||
bio_put(bio);
|
bio_put(bio);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -6979,7 +6979,18 @@ insert:
|
||||||
* existing will always be non-NULL, since there must be
|
* existing will always be non-NULL, since there must be
|
||||||
* extent causing the -EEXIST.
|
* extent causing the -EEXIST.
|
||||||
*/
|
*/
|
||||||
if (start >= extent_map_end(existing) ||
|
if (existing->start == em->start &&
|
||||||
|
extent_map_end(existing) == extent_map_end(em) &&
|
||||||
|
em->block_start == existing->block_start) {
|
||||||
|
/*
|
||||||
|
* these two extents are the same, it happens
|
||||||
|
* with inlines especially
|
||||||
|
*/
|
||||||
|
free_extent_map(em);
|
||||||
|
em = existing;
|
||||||
|
err = 0;
|
||||||
|
|
||||||
|
} else if (start >= extent_map_end(existing) ||
|
||||||
start <= existing->start) {
|
start <= existing->start) {
|
||||||
/*
|
/*
|
||||||
* The existing extent map is the one nearest to
|
* The existing extent map is the one nearest to
|
||||||
|
|
|
@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||||
const u64 range_start, const u64 range_len)
|
const u64 range_start, const u64 range_len)
|
||||||
{
|
{
|
||||||
struct btrfs_root *root;
|
struct btrfs_root *root;
|
||||||
struct list_head splice;
|
struct list_head splice;
|
||||||
int done;
|
int done;
|
||||||
|
int total_done = 0;
|
||||||
|
|
||||||
INIT_LIST_HEAD(&splice);
|
INIT_LIST_HEAD(&splice);
|
||||||
|
|
||||||
|
@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||||
done = btrfs_wait_ordered_extents(root, nr,
|
done = btrfs_wait_ordered_extents(root, nr,
|
||||||
range_start, range_len);
|
range_start, range_len);
|
||||||
btrfs_put_fs_root(root);
|
btrfs_put_fs_root(root);
|
||||||
|
total_done += done;
|
||||||
|
|
||||||
spin_lock(&fs_info->ordered_root_lock);
|
spin_lock(&fs_info->ordered_root_lock);
|
||||||
if (nr != -1) {
|
if (nr != -1) {
|
||||||
|
@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||||
list_splice_tail(&splice, &fs_info->ordered_roots);
|
list_splice_tail(&splice, &fs_info->ordered_roots);
|
||||||
spin_unlock(&fs_info->ordered_root_lock);
|
spin_unlock(&fs_info->ordered_root_lock);
|
||||||
mutex_unlock(&fs_info->ordered_operations_mutex);
|
mutex_unlock(&fs_info->ordered_operations_mutex);
|
||||||
|
|
||||||
|
return total_done;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
|
||||||
u32 *sum, int len);
|
u32 *sum, int len);
|
||||||
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
|
||||||
const u64 range_start, const u64 range_len);
|
const u64 range_start, const u64 range_len);
|
||||||
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
|
||||||
const u64 range_start, const u64 range_len);
|
const u64 range_start, const u64 range_len);
|
||||||
void btrfs_get_logged_extents(struct inode *inode,
|
void btrfs_get_logged_extents(struct inode *inode,
|
||||||
struct list_head *logged_list,
|
struct list_head *logged_list,
|
||||||
|
|
|
@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
|
||||||
|
|
||||||
do {
|
do {
|
||||||
enqueued = 0;
|
enqueued = 0;
|
||||||
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
list_for_each_entry(device, &fs_devices->devices, dev_list) {
|
||||||
if (atomic_read(&device->reada_in_flight) <
|
if (atomic_read(&device->reada_in_flight) <
|
||||||
MAX_IN_FLIGHT)
|
MAX_IN_FLIGHT)
|
||||||
enqueued += reada_start_machine_dev(fs_info,
|
enqueued += reada_start_machine_dev(fs_info,
|
||||||
device);
|
device);
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
total += enqueued;
|
total += enqueued;
|
||||||
} while (enqueued && total < 10000);
|
} while (enqueued && total < 10000);
|
||||||
|
|
||||||
|
|
|
@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||||
*/
|
*/
|
||||||
scrub_pause_on(fs_info);
|
scrub_pause_on(fs_info);
|
||||||
ret = btrfs_inc_block_group_ro(root, cache);
|
ret = btrfs_inc_block_group_ro(root, cache);
|
||||||
|
if (!ret && is_dev_replace) {
|
||||||
|
/*
|
||||||
|
* If we are doing a device replace wait for any tasks
|
||||||
|
* that started dellaloc right before we set the block
|
||||||
|
* group to RO mode, as they might have just allocated
|
||||||
|
* an extent from it or decided they could do a nocow
|
||||||
|
* write. And if any such tasks did that, wait for their
|
||||||
|
* ordered extents to complete and then commit the
|
||||||
|
* current transaction, so that we can later see the new
|
||||||
|
* extent items in the extent tree - the ordered extents
|
||||||
|
* create delayed data references (for cow writes) when
|
||||||
|
* they complete, which will be run and insert the
|
||||||
|
* corresponding extent items into the extent tree when
|
||||||
|
* we commit the transaction they used when running
|
||||||
|
* inode.c:btrfs_finish_ordered_io(). We later use
|
||||||
|
* the commit root of the extent tree to find extents
|
||||||
|
* to copy from the srcdev into the tgtdev, and we don't
|
||||||
|
* want to miss any new extents.
|
||||||
|
*/
|
||||||
|
btrfs_wait_block_group_reservations(cache);
|
||||||
|
btrfs_wait_nocow_writers(cache);
|
||||||
|
ret = btrfs_wait_ordered_roots(fs_info, -1,
|
||||||
|
cache->key.objectid,
|
||||||
|
cache->key.offset);
|
||||||
|
if (ret > 0) {
|
||||||
|
struct btrfs_trans_handle *trans;
|
||||||
|
|
||||||
|
trans = btrfs_join_transaction(root);
|
||||||
|
if (IS_ERR(trans))
|
||||||
|
ret = PTR_ERR(trans);
|
||||||
|
else
|
||||||
|
ret = btrfs_commit_transaction(trans,
|
||||||
|
root);
|
||||||
|
if (ret) {
|
||||||
|
scrub_pause_off(fs_info);
|
||||||
|
btrfs_put_block_group(cache);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
scrub_pause_off(fs_info);
|
scrub_pause_off(fs_info);
|
||||||
|
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
|
@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
|
||||||
dev_replace->cursor_right = found_key.offset + length;
|
dev_replace->cursor_right = found_key.offset + length;
|
||||||
dev_replace->cursor_left = found_key.offset;
|
dev_replace->cursor_left = found_key.offset;
|
||||||
dev_replace->item_needs_writeback = 1;
|
dev_replace->item_needs_writeback = 1;
|
||||||
|
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
|
||||||
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
|
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
|
||||||
found_key.offset, cache, is_dev_replace);
|
found_key.offset, cache, is_dev_replace);
|
||||||
|
|
||||||
|
@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||||
|
|
||||||
scrub_pause_off(fs_info);
|
scrub_pause_off(fs_info);
|
||||||
|
|
||||||
|
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
|
||||||
|
dev_replace->cursor_left = dev_replace->cursor_right;
|
||||||
|
dev_replace->item_needs_writeback = 1;
|
||||||
|
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
|
||||||
|
|
||||||
if (ro_set)
|
if (ro_set)
|
||||||
btrfs_dec_block_group_ro(root, cache);
|
btrfs_dec_block_group_ro(root, cache);
|
||||||
|
|
||||||
|
@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
|
||||||
ret = -ENOMEM;
|
ret = -ENOMEM;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
dev_replace->cursor_left = dev_replace->cursor_right;
|
|
||||||
dev_replace->item_needs_writeback = 1;
|
|
||||||
skip:
|
skip:
|
||||||
key.offset = found_key.offset + length;
|
key.offset = found_key.offset + length;
|
||||||
btrfs_release_path(path);
|
btrfs_release_path(path);
|
||||||
|
|
|
@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
||||||
u64 dev_extent_len = 0;
|
u64 dev_extent_len = 0;
|
||||||
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
|
||||||
int i, ret = 0;
|
int i, ret = 0;
|
||||||
|
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
|
||||||
|
|
||||||
/* Just in case */
|
/* Just in case */
|
||||||
root = root->fs_info->chunk_root;
|
root = root->fs_info->chunk_root;
|
||||||
|
@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
||||||
check_system_chunk(trans, extent_root, map->type);
|
check_system_chunk(trans, extent_root, map->type);
|
||||||
unlock_chunks(root->fs_info->chunk_root);
|
unlock_chunks(root->fs_info->chunk_root);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Take the device list mutex to prevent races with the final phase of
|
||||||
|
* a device replace operation that replaces the device object associated
|
||||||
|
* with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
|
||||||
|
*/
|
||||||
|
mutex_lock(&fs_devices->device_list_mutex);
|
||||||
for (i = 0; i < map->num_stripes; i++) {
|
for (i = 0; i < map->num_stripes; i++) {
|
||||||
struct btrfs_device *device = map->stripes[i].dev;
|
struct btrfs_device *device = map->stripes[i].dev;
|
||||||
ret = btrfs_free_dev_extent(trans, device,
|
ret = btrfs_free_dev_extent(trans, device,
|
||||||
map->stripes[i].physical,
|
map->stripes[i].physical,
|
||||||
&dev_extent_len);
|
&dev_extent_len);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
btrfs_abort_transaction(trans, root, ret);
|
btrfs_abort_transaction(trans, root, ret);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
|
||||||
if (map->stripes[i].dev) {
|
if (map->stripes[i].dev) {
|
||||||
ret = btrfs_update_device(trans, map->stripes[i].dev);
|
ret = btrfs_update_device(trans, map->stripes[i].dev);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
btrfs_abort_transaction(trans, root, ret);
|
btrfs_abort_transaction(trans, root, ret);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
mutex_unlock(&fs_devices->device_list_mutex);
|
||||||
|
|
||||||
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
|
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
btrfs_abort_transaction(trans, root, ret);
|
btrfs_abort_transaction(trans, root, ret);
|
||||||
|
@ -5762,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (found) {
|
if (found) {
|
||||||
if (physical_of_found + map->stripe_len <=
|
struct btrfs_bio_stripe *tgtdev_stripe =
|
||||||
dev_replace->cursor_left) {
|
bbio->stripes + num_stripes;
|
||||||
struct btrfs_bio_stripe *tgtdev_stripe =
|
|
||||||
bbio->stripes + num_stripes;
|
|
||||||
|
|
||||||
tgtdev_stripe->physical = physical_of_found;
|
tgtdev_stripe->physical = physical_of_found;
|
||||||
tgtdev_stripe->length =
|
tgtdev_stripe->length =
|
||||||
bbio->stripes[index_srcdev].length;
|
bbio->stripes[index_srcdev].length;
|
||||||
tgtdev_stripe->dev = dev_replace->tgtdev;
|
tgtdev_stripe->dev = dev_replace->tgtdev;
|
||||||
bbio->tgtdev_map[index_srcdev] = num_stripes;
|
bbio->tgtdev_map[index_srcdev] = num_stripes;
|
||||||
|
|
||||||
tgtdev_indexes++;
|
tgtdev_indexes++;
|
||||||
num_stripes++;
|
num_stripes++;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue