Merge branch 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs

Pull btrfs fixes from Chris Mason:
 "The important part of this pull is Filipe's set of fixes for btrfs
  device replacement.  Filipe fixed a few issues seen on the list and a
  number he found on his own"

* 'for-linus-4.7' of git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs:
  Btrfs: deal with duplciates during extent_map insertion in btrfs_get_extent
  Btrfs: fix race between device replace and read repair
  Btrfs: fix race between device replace and discard
  Btrfs: fix race between device replace and chunk allocation
  Btrfs: fix race setting block group back to RW mode during device replace
  Btrfs: fix unprotected assignment of the left cursor for device replace
  Btrfs: fix race setting block group readonly during device replace
  Btrfs: fix race between device replace and block group removal
  Btrfs: fix race between readahead and device replace/removal
This commit is contained in:
Linus Torvalds 2016-06-04 11:56:28 -07:00
commit b2d5ad8223
8 changed files with 103 additions and 18 deletions

View File

@ -2042,6 +2042,11 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
struct btrfs_bio *bbio = NULL; struct btrfs_bio *bbio = NULL;
/*
* Avoid races with device replace and make sure our bbio has devices
* associated to its stripes that don't go away while we are discarding.
*/
btrfs_bio_counter_inc_blocked(root->fs_info);
/* Tell the block device(s) that the sectors can be discarded */ /* Tell the block device(s) that the sectors can be discarded */
ret = btrfs_map_block(root->fs_info, REQ_DISCARD, ret = btrfs_map_block(root->fs_info, REQ_DISCARD,
bytenr, &num_bytes, &bbio, 0); bytenr, &num_bytes, &bbio, 0);
@ -2074,6 +2079,7 @@ int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr,
} }
btrfs_put_bbio(bbio); btrfs_put_bbio(bbio);
} }
btrfs_bio_counter_dec(root->fs_info);
if (actual_bytes) if (actual_bytes)
*actual_bytes = discarded_bytes; *actual_bytes = discarded_bytes;

View File

@ -2025,9 +2025,16 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
bio->bi_iter.bi_size = 0; bio->bi_iter.bi_size = 0;
map_length = length; map_length = length;
/*
* Avoid races with device replace and make sure our bbio has devices
* associated to its stripes that don't go away while we are doing the
* read repair operation.
*/
btrfs_bio_counter_inc_blocked(fs_info);
ret = btrfs_map_block(fs_info, WRITE, logical, ret = btrfs_map_block(fs_info, WRITE, logical,
&map_length, &bbio, mirror_num); &map_length, &bbio, mirror_num);
if (ret) { if (ret) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio); bio_put(bio);
return -EIO; return -EIO;
} }
@ -2037,6 +2044,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
dev = bbio->stripes[mirror_num-1].dev; dev = bbio->stripes[mirror_num-1].dev;
btrfs_put_bbio(bbio); btrfs_put_bbio(bbio);
if (!dev || !dev->bdev || !dev->writeable) { if (!dev || !dev->bdev || !dev->writeable) {
btrfs_bio_counter_dec(fs_info);
bio_put(bio); bio_put(bio);
return -EIO; return -EIO;
} }
@ -2045,6 +2053,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) { if (btrfsic_submit_bio_wait(WRITE_SYNC, bio)) {
/* try to remap that extent elsewhere? */ /* try to remap that extent elsewhere? */
btrfs_bio_counter_dec(fs_info);
bio_put(bio); bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
return -EIO; return -EIO;
@ -2054,6 +2063,7 @@ int repair_io_failure(struct inode *inode, u64 start, u64 length, u64 logical,
"read error corrected: ino %llu off %llu (dev %s sector %llu)", "read error corrected: ino %llu off %llu (dev %s sector %llu)",
btrfs_ino(inode), start, btrfs_ino(inode), start,
rcu_str_deref(dev->name), sector); rcu_str_deref(dev->name), sector);
btrfs_bio_counter_dec(fs_info);
bio_put(bio); bio_put(bio);
return 0; return 0;
} }

View File

@ -6979,7 +6979,18 @@ insert:
* existing will always be non-NULL, since there must be * existing will always be non-NULL, since there must be
* extent causing the -EEXIST. * extent causing the -EEXIST.
*/ */
if (start >= extent_map_end(existing) || if (existing->start == em->start &&
extent_map_end(existing) == extent_map_end(em) &&
em->block_start == existing->block_start) {
/*
* these two extents are the same, it happens
* with inlines especially
*/
free_extent_map(em);
em = existing;
err = 0;
} else if (start >= extent_map_end(existing) ||
start <= existing->start) { start <= existing->start) {
/* /*
* The existing extent map is the one nearest to * The existing extent map is the one nearest to

View File

@ -718,12 +718,13 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
return count; return count;
} }
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len) const u64 range_start, const u64 range_len)
{ {
struct btrfs_root *root; struct btrfs_root *root;
struct list_head splice; struct list_head splice;
int done; int done;
int total_done = 0;
INIT_LIST_HEAD(&splice); INIT_LIST_HEAD(&splice);
@ -742,6 +743,7 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
done = btrfs_wait_ordered_extents(root, nr, done = btrfs_wait_ordered_extents(root, nr,
range_start, range_len); range_start, range_len);
btrfs_put_fs_root(root); btrfs_put_fs_root(root);
total_done += done;
spin_lock(&fs_info->ordered_root_lock); spin_lock(&fs_info->ordered_root_lock);
if (nr != -1) { if (nr != -1) {
@ -752,6 +754,8 @@ void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
list_splice_tail(&splice, &fs_info->ordered_roots); list_splice_tail(&splice, &fs_info->ordered_roots);
spin_unlock(&fs_info->ordered_root_lock); spin_unlock(&fs_info->ordered_root_lock);
mutex_unlock(&fs_info->ordered_operations_mutex); mutex_unlock(&fs_info->ordered_operations_mutex);
return total_done;
} }
/* /*

View File

@ -199,7 +199,7 @@ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
u32 *sum, int len); u32 *sum, int len);
int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr, int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr,
const u64 range_start, const u64 range_len); const u64 range_start, const u64 range_len);
void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr, int btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr,
const u64 range_start, const u64 range_len); const u64 range_start, const u64 range_len);
void btrfs_get_logged_extents(struct inode *inode, void btrfs_get_logged_extents(struct inode *inode,
struct list_head *logged_list, struct list_head *logged_list,

View File

@ -761,12 +761,14 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
do { do {
enqueued = 0; enqueued = 0;
mutex_lock(&fs_devices->device_list_mutex);
list_for_each_entry(device, &fs_devices->devices, dev_list) { list_for_each_entry(device, &fs_devices->devices, dev_list) {
if (atomic_read(&device->reada_in_flight) < if (atomic_read(&device->reada_in_flight) <
MAX_IN_FLIGHT) MAX_IN_FLIGHT)
enqueued += reada_start_machine_dev(fs_info, enqueued += reada_start_machine_dev(fs_info,
device); device);
} }
mutex_unlock(&fs_devices->device_list_mutex);
total += enqueued; total += enqueued;
} while (enqueued && total < 10000); } while (enqueued && total < 10000);

View File

@ -3582,6 +3582,46 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
*/ */
scrub_pause_on(fs_info); scrub_pause_on(fs_info);
ret = btrfs_inc_block_group_ro(root, cache); ret = btrfs_inc_block_group_ro(root, cache);
if (!ret && is_dev_replace) {
/*
* If we are doing a device replace wait for any tasks
* that started dellaloc right before we set the block
* group to RO mode, as they might have just allocated
* an extent from it or decided they could do a nocow
* write. And if any such tasks did that, wait for their
* ordered extents to complete and then commit the
* current transaction, so that we can later see the new
* extent items in the extent tree - the ordered extents
* create delayed data references (for cow writes) when
* they complete, which will be run and insert the
* corresponding extent items into the extent tree when
* we commit the transaction they used when running
* inode.c:btrfs_finish_ordered_io(). We later use
* the commit root of the extent tree to find extents
* to copy from the srcdev into the tgtdev, and we don't
* want to miss any new extents.
*/
btrfs_wait_block_group_reservations(cache);
btrfs_wait_nocow_writers(cache);
ret = btrfs_wait_ordered_roots(fs_info, -1,
cache->key.objectid,
cache->key.offset);
if (ret > 0) {
struct btrfs_trans_handle *trans;
trans = btrfs_join_transaction(root);
if (IS_ERR(trans))
ret = PTR_ERR(trans);
else
ret = btrfs_commit_transaction(trans,
root);
if (ret) {
scrub_pause_off(fs_info);
btrfs_put_block_group(cache);
break;
}
}
}
scrub_pause_off(fs_info); scrub_pause_off(fs_info);
if (ret == 0) { if (ret == 0) {
@ -3602,9 +3642,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
break; break;
} }
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
dev_replace->cursor_right = found_key.offset + length; dev_replace->cursor_right = found_key.offset + length;
dev_replace->cursor_left = found_key.offset; dev_replace->cursor_left = found_key.offset;
dev_replace->item_needs_writeback = 1; dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length,
found_key.offset, cache, is_dev_replace); found_key.offset, cache, is_dev_replace);
@ -3640,6 +3682,11 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
scrub_pause_off(fs_info); scrub_pause_off(fs_info);
btrfs_dev_replace_lock(&fs_info->dev_replace, 1);
dev_replace->cursor_left = dev_replace->cursor_right;
dev_replace->item_needs_writeback = 1;
btrfs_dev_replace_unlock(&fs_info->dev_replace, 1);
if (ro_set) if (ro_set)
btrfs_dec_block_group_ro(root, cache); btrfs_dec_block_group_ro(root, cache);
@ -3677,9 +3724,6 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
ret = -ENOMEM; ret = -ENOMEM;
break; break;
} }
dev_replace->cursor_left = dev_replace->cursor_right;
dev_replace->item_needs_writeback = 1;
skip: skip:
key.offset = found_key.offset + length; key.offset = found_key.offset + length;
btrfs_release_path(path); btrfs_release_path(path);

View File

@ -2761,6 +2761,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
u64 dev_extent_len = 0; u64 dev_extent_len = 0;
u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; u64 chunk_objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
int i, ret = 0; int i, ret = 0;
struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
/* Just in case */ /* Just in case */
root = root->fs_info->chunk_root; root = root->fs_info->chunk_root;
@ -2787,12 +2788,19 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
check_system_chunk(trans, extent_root, map->type); check_system_chunk(trans, extent_root, map->type);
unlock_chunks(root->fs_info->chunk_root); unlock_chunks(root->fs_info->chunk_root);
/*
* Take the device list mutex to prevent races with the final phase of
* a device replace operation that replaces the device object associated
* with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
*/
mutex_lock(&fs_devices->device_list_mutex);
for (i = 0; i < map->num_stripes; i++) { for (i = 0; i < map->num_stripes; i++) {
struct btrfs_device *device = map->stripes[i].dev; struct btrfs_device *device = map->stripes[i].dev;
ret = btrfs_free_dev_extent(trans, device, ret = btrfs_free_dev_extent(trans, device,
map->stripes[i].physical, map->stripes[i].physical,
&dev_extent_len); &dev_extent_len);
if (ret) { if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
goto out; goto out;
} }
@ -2811,11 +2819,14 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
if (map->stripes[i].dev) { if (map->stripes[i].dev) {
ret = btrfs_update_device(trans, map->stripes[i].dev); ret = btrfs_update_device(trans, map->stripes[i].dev);
if (ret) { if (ret) {
mutex_unlock(&fs_devices->device_list_mutex);
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
goto out; goto out;
} }
} }
} }
mutex_unlock(&fs_devices->device_list_mutex);
ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset); ret = btrfs_free_chunk(trans, root, chunk_objectid, chunk_offset);
if (ret) { if (ret) {
btrfs_abort_transaction(trans, root, ret); btrfs_abort_transaction(trans, root, ret);
@ -5762,20 +5773,17 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
} }
} }
if (found) { if (found) {
if (physical_of_found + map->stripe_len <= struct btrfs_bio_stripe *tgtdev_stripe =
dev_replace->cursor_left) { bbio->stripes + num_stripes;
struct btrfs_bio_stripe *tgtdev_stripe =
bbio->stripes + num_stripes;
tgtdev_stripe->physical = physical_of_found; tgtdev_stripe->physical = physical_of_found;
tgtdev_stripe->length = tgtdev_stripe->length =
bbio->stripes[index_srcdev].length; bbio->stripes[index_srcdev].length;
tgtdev_stripe->dev = dev_replace->tgtdev; tgtdev_stripe->dev = dev_replace->tgtdev;
bbio->tgtdev_map[index_srcdev] = num_stripes; bbio->tgtdev_map[index_srcdev] = num_stripes;
tgtdev_indexes++; tgtdev_indexes++;
num_stripes++; num_stripes++;
}
} }
} }