btrfs: raid56: switch write path to rmw_rbio()
This includes the following changes: - Implement new raid_unplug() functions Now we don't need a workqueue to run the plug, as all our work is just queue rmw_rbio_work() call, which can be executed without sleep. - Implement a rmw_rbio_work_locked() helper This is for unlock_stripe(), which is already holding the full stripe lock. - Remove all the old functions This should already shows how complex the old functions are, as we ended up removing the following functions: * rmw_work() * validate_rbio_for_rmw() * raid56_rmw_end_io_work() * raid56_rmw_stripe() * full_stripe_write() * partial_stripe_write() * __raid56_parity_write() * run_plug() * unplug_work() * btrfs_raid_unplug() * rmw_work() * __raid56_parity_recover() * raid_recover_end_io_work() - Unexport rmw_rbio() Signed-off-by: Qu Wenruo <wqu@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
This commit is contained in:
parent
5eb30ee26f
commit
93723095b5
|
@ -64,9 +64,9 @@ struct sector_ptr {
|
|||
unsigned int uptodate:8;
|
||||
};
|
||||
|
||||
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
|
||||
static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
|
||||
static void rmw_work(struct work_struct *work);
|
||||
static void rmw_rbio_work(struct work_struct *work);
|
||||
static void rmw_rbio_work_locked(struct work_struct *work);
|
||||
static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
|
||||
static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
|
||||
static void index_rbio_pages(struct btrfs_raid_bio *rbio);
|
||||
|
@ -816,7 +816,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
|
|||
start_async_work(next, recover_rbio_work_locked);
|
||||
} else if (next->operation == BTRFS_RBIO_WRITE) {
|
||||
steal_rbio(rbio, next);
|
||||
start_async_work(next, rmw_work);
|
||||
start_async_work(next, rmw_rbio_work_locked);
|
||||
} else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
|
||||
steal_rbio(rbio, next);
|
||||
start_async_work(next, scrub_parity_work);
|
||||
|
@ -1108,23 +1108,6 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* while we're doing the read/modify/write cycle, we could
|
||||
* have errors in reading pages off the disk. This checks
|
||||
* for errors and if we're not able to read the page it'll
|
||||
* trigger parity reconstruction. The rmw will be finished
|
||||
* after we've reconstructed the failed stripes
|
||||
*/
|
||||
static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
if (rbio->faila >= 0 || rbio->failb >= 0) {
|
||||
BUG_ON(rbio->faila == rbio->real_stripes - 1);
|
||||
__raid56_parity_recover(rbio);
|
||||
} else {
|
||||
finish_rmw(rbio);
|
||||
}
|
||||
}
|
||||
|
||||
static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
|
||||
{
|
||||
const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
|
||||
|
@ -1601,31 +1584,6 @@ static void raid56_bio_end_io(struct bio *bio)
|
|||
&rbio->end_io_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* End io handler for the read phase of the RMW cycle. All the bios here are
|
||||
* physical stripe bios we've read from the disk so we can recalculate the
|
||||
* parity of the stripe.
|
||||
*
|
||||
* This will usually kick off finish_rmw once all the bios are read in, but it
|
||||
* may trigger parity reconstruction if we had any errors along the way
|
||||
*/
|
||||
static void raid56_rmw_end_io_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_raid_bio *rbio =
|
||||
container_of(work, struct btrfs_raid_bio, end_io_work);
|
||||
|
||||
if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* This will normally call finish_rmw to start our write but if there
|
||||
* are any failed stripes we'll reconstruct from parity first.
|
||||
*/
|
||||
validate_rbio_for_rmw(rbio);
|
||||
}
|
||||
|
||||
static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
|
||||
struct bio_list *bio_list)
|
||||
{
|
||||
|
@ -1686,122 +1644,6 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* the stripe must be locked by the caller. It will
|
||||
* unlock after all the writes are done
|
||||
*/
|
||||
static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int bios_to_read = 0;
|
||||
struct bio_list bio_list;
|
||||
int ret;
|
||||
struct bio *bio;
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
ret = alloc_rbio_pages(rbio);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
|
||||
index_rbio_pages(rbio);
|
||||
|
||||
atomic_set(&rbio->error, 0);
|
||||
|
||||
ret = rmw_assemble_read_bios(rbio, &bio_list);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
|
||||
bios_to_read = bio_list_size(&bio_list);
|
||||
if (!bios_to_read) {
|
||||
/*
|
||||
* this can happen if others have merged with
|
||||
* us, it means there is nothing left to read.
|
||||
* But if there are missing devices it may not be
|
||||
* safe to do the full stripe write yet.
|
||||
*/
|
||||
goto finish;
|
||||
}
|
||||
|
||||
/*
|
||||
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||
* touch it after that.
|
||||
*/
|
||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||
INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
|
||||
while ((bio = bio_list_pop(&bio_list))) {
|
||||
bio->bi_end_io = raid56_bio_end_io;
|
||||
|
||||
if (trace_raid56_read_partial_enabled()) {
|
||||
struct raid56_bio_trace_info trace_info = { 0 };
|
||||
|
||||
bio_get_trace_info(rbio, bio, &trace_info);
|
||||
trace_raid56_read_partial(rbio, bio, &trace_info);
|
||||
}
|
||||
submit_bio(bio);
|
||||
}
|
||||
/* the actual write will happen once the reads are done */
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
|
||||
return -EIO;
|
||||
|
||||
finish:
|
||||
validate_rbio_for_rmw(rbio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* if the upper layers pass in a full stripe, we thank them by only allocating
|
||||
* enough pages to hold the parity, and sending it all down quickly.
|
||||
*/
|
||||
static int full_stripe_write(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = alloc_rbio_parity_pages(rbio);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = lock_stripe_add(rbio);
|
||||
if (ret == 0)
|
||||
finish_rmw(rbio);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* partial stripe writes get handed over to async helpers.
|
||||
* We're really hoping to merge a few more writes into this
|
||||
* rbio before calculating new parity
|
||||
*/
|
||||
static int partial_stripe_write(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = lock_stripe_add(rbio);
|
||||
if (ret == 0)
|
||||
start_async_work(rbio, rmw_work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* sometimes while we were reading from the drive to
|
||||
* recalculate parity, enough new bios come into create
|
||||
* a full stripe. So we do a check here to see if we can
|
||||
* go directly to finish_rmw
|
||||
*/
|
||||
static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
/* head off into rmw land if we don't have a full stripe */
|
||||
if (!rbio_is_full(rbio))
|
||||
return partial_stripe_write(rbio);
|
||||
return full_stripe_write(rbio);
|
||||
}
|
||||
|
||||
/*
|
||||
* We use plugging call backs to collect full stripes.
|
||||
* Any time we get a partial stripe write while plugged
|
||||
|
@ -1836,28 +1678,22 @@ static int plug_cmp(void *priv, const struct list_head *a,
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void run_plug(struct btrfs_plug_cb *plug)
|
||||
static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
{
|
||||
struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb);
|
||||
struct btrfs_raid_bio *cur;
|
||||
struct btrfs_raid_bio *last = NULL;
|
||||
|
||||
/*
|
||||
* sort our plug list then try to merge
|
||||
* everything we can in hopes of creating full
|
||||
* stripes.
|
||||
*/
|
||||
list_sort(NULL, &plug->rbio_list, plug_cmp);
|
||||
|
||||
while (!list_empty(&plug->rbio_list)) {
|
||||
cur = list_entry(plug->rbio_list.next,
|
||||
struct btrfs_raid_bio, plug_list);
|
||||
list_del_init(&cur->plug_list);
|
||||
|
||||
if (rbio_is_full(cur)) {
|
||||
int ret;
|
||||
|
||||
/* we have a full stripe, send it down */
|
||||
ret = full_stripe_write(cur);
|
||||
BUG_ON(ret);
|
||||
/* We have a full stripe, queue it down. */
|
||||
start_async_work(cur, rmw_rbio_work);
|
||||
continue;
|
||||
}
|
||||
if (last) {
|
||||
|
@ -1865,42 +1701,16 @@ static void run_plug(struct btrfs_plug_cb *plug)
|
|||
merge_rbio(last, cur);
|
||||
free_raid_bio(cur);
|
||||
continue;
|
||||
|
||||
}
|
||||
__raid56_parity_write(last);
|
||||
start_async_work(last, rmw_rbio_work);
|
||||
}
|
||||
last = cur;
|
||||
}
|
||||
if (last) {
|
||||
__raid56_parity_write(last);
|
||||
}
|
||||
if (last)
|
||||
start_async_work(last, rmw_rbio_work);
|
||||
kfree(plug);
|
||||
}
|
||||
|
||||
/*
|
||||
* if the unplug comes from schedule, we have to push the
|
||||
* work off to a helper thread
|
||||
*/
|
||||
static void unplug_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_plug_cb *plug;
|
||||
plug = container_of(work, struct btrfs_plug_cb, work);
|
||||
run_plug(plug);
|
||||
}
|
||||
|
||||
static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
||||
{
|
||||
struct btrfs_plug_cb *plug;
|
||||
plug = container_of(cb, struct btrfs_plug_cb, cb);
|
||||
|
||||
if (from_schedule) {
|
||||
INIT_WORK(&plug->work, unplug_work);
|
||||
queue_work(plug->info->rmw_workers, &plug->work);
|
||||
return;
|
||||
}
|
||||
run_plug(plug);
|
||||
}
|
||||
|
||||
/* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
|
||||
static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
|
||||
{
|
||||
|
@ -1948,19 +1758,13 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
|
|||
rbio_add_bio(rbio, bio);
|
||||
|
||||
/*
|
||||
* don't plug on full rbios, just get them out the door
|
||||
* Don't plug on full rbios, just get them out the door
|
||||
* as quickly as we can
|
||||
*/
|
||||
if (rbio_is_full(rbio)) {
|
||||
ret = full_stripe_write(rbio);
|
||||
if (ret) {
|
||||
free_raid_bio(rbio);
|
||||
goto fail;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (rbio_is_full(rbio))
|
||||
goto queue_rbio;
|
||||
|
||||
cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
|
||||
cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
|
||||
if (cb) {
|
||||
plug = container_of(cb, struct btrfs_plug_cb, cb);
|
||||
if (!plug->info) {
|
||||
|
@ -1968,13 +1772,14 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
|
|||
INIT_LIST_HEAD(&plug->rbio_list);
|
||||
}
|
||||
list_add_tail(&rbio->plug_list, &plug->rbio_list);
|
||||
} else {
|
||||
ret = __raid56_parity_write(rbio);
|
||||
if (ret) {
|
||||
free_raid_bio(rbio);
|
||||
goto fail;
|
||||
}
|
||||
return;
|
||||
}
|
||||
queue_rbio:
|
||||
/*
|
||||
* Either we don't have any existing plug, or we're doing a full stripe,
|
||||
* can queue the rmw work now.
|
||||
*/
|
||||
start_async_work(rbio, rmw_rbio_work);
|
||||
|
||||
return;
|
||||
|
||||
|
@ -2217,21 +2022,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called only for stripes we've read from disk to reconstruct the
|
||||
* parity.
|
||||
*/
|
||||
static void raid_recover_end_io_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_raid_bio *rbio =
|
||||
container_of(work, struct btrfs_raid_bio, end_io_work);
|
||||
|
||||
if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
else
|
||||
__raid_recover_end_io(rbio);
|
||||
}
|
||||
|
||||
static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
|
||||
struct bio_list *bio_list)
|
||||
{
|
||||
|
@ -2348,79 +2138,6 @@ static void recover_rbio_work_locked(struct work_struct *work)
|
|||
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
|
||||
}
|
||||
|
||||
/*
|
||||
* reads everything we need off the disk to reconstruct
|
||||
* the parity. endio handlers trigger final reconstruction
|
||||
* when the IO is done.
|
||||
*
|
||||
* This is used both for reads from the higher layers and for
|
||||
* parity construction required to finish a rmw cycle.
|
||||
*/
|
||||
static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
int bios_to_read = 0;
|
||||
struct bio_list bio_list;
|
||||
int ret;
|
||||
struct bio *bio;
|
||||
|
||||
bio_list_init(&bio_list);
|
||||
|
||||
ret = alloc_rbio_pages(rbio);
|
||||
if (ret)
|
||||
goto cleanup;
|
||||
|
||||
atomic_set(&rbio->error, 0);
|
||||
|
||||
ret = recover_assemble_read_bios(rbio, &bio_list);
|
||||
if (ret < 0)
|
||||
goto cleanup;
|
||||
|
||||
bios_to_read = bio_list_size(&bio_list);
|
||||
if (!bios_to_read) {
|
||||
/*
|
||||
* we might have no bios to read just because the pages
|
||||
* were up to date, or we might have no bios to read because
|
||||
* the devices were gone.
|
||||
*/
|
||||
if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
|
||||
__raid_recover_end_io(rbio);
|
||||
return 0;
|
||||
} else {
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The bioc may be freed once we submit the last bio. Make sure not to
|
||||
* touch it after that.
|
||||
*/
|
||||
atomic_set(&rbio->stripes_pending, bios_to_read);
|
||||
INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
|
||||
while ((bio = bio_list_pop(&bio_list))) {
|
||||
bio->bi_end_io = raid56_bio_end_io;
|
||||
|
||||
if (trace_raid56_scrub_read_recover_enabled()) {
|
||||
struct raid56_bio_trace_info trace_info = { 0 };
|
||||
|
||||
bio_get_trace_info(rbio, bio, &trace_info);
|
||||
trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
|
||||
}
|
||||
submit_bio(bio);
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup:
|
||||
if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
|
||||
rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
|
||||
rbio_orig_end_io(rbio, BLK_STS_IOERR);
|
||||
|
||||
while ((bio = bio_list_pop(&bio_list)))
|
||||
bio_put(bio);
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* the main entry point for reads from the higher layers. This
|
||||
* is really only called when the normal read path had a failure,
|
||||
|
@ -2529,7 +2246,7 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
|
|||
}
|
||||
}
|
||||
|
||||
int rmw_rbio(struct btrfs_raid_bio *rbio)
|
||||
static int rmw_rbio(struct btrfs_raid_bio *rbio)
|
||||
{
|
||||
struct bio_list bio_list;
|
||||
int sectornr;
|
||||
|
@ -2615,12 +2332,29 @@ write:
|
|||
return ret;
|
||||
}
|
||||
|
||||
static void rmw_work(struct work_struct *work)
|
||||
static void rmw_rbio_work(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int ret;
|
||||
|
||||
rbio = container_of(work, struct btrfs_raid_bio, work);
|
||||
raid56_rmw_stripe(rbio);
|
||||
|
||||
ret = lock_stripe_add(rbio);
|
||||
if (ret == 0) {
|
||||
ret = rmw_rbio(rbio);
|
||||
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
|
||||
}
|
||||
}
|
||||
|
||||
static void rmw_rbio_work_locked(struct work_struct *work)
|
||||
{
|
||||
struct btrfs_raid_bio *rbio;
|
||||
int ret;
|
||||
|
||||
rbio = container_of(work, struct btrfs_raid_bio, work);
|
||||
|
||||
ret = rmw_rbio(rbio);
|
||||
rbio_orig_end_io(rbio, errno_to_blk_status(ret));
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
|
@ -185,9 +185,4 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
|
|||
int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
|
||||
void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
|
||||
|
||||
/*
|
||||
* Placeholder definition to avoid warning, will be removed when
|
||||
* the full write path is migrated.
|
||||
*/
|
||||
int rmw_rbio(struct btrfs_raid_bio *rbio);
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue