Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
 "This fixes several bugs, three of them are marked for stable:

   - an initialization issue fixed by Ming

   - a bio clone race issue fixed by me

   - an async tx flush issue fixed by Ofer

   - other cleanups"

* 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  MD: fix warnning for UP case
  md/raid5: add thread_group worker async_tx_issue_pending_all
  md: simplify code with bio_io_error
  md/raid1: fix writebehind bio clone
  md: raid1-10: move raid1/raid10 common code into raid1-10.c
  md: raid1/raid10: initialize bvec table via bio_add_page()
  md: remove 'idx' from 'struct resync_pages'
This commit is contained in:
Linus Torvalds 2017-07-28 12:24:21 -07:00
commit 9583f1c99f
6 changed files with 115 additions and 126 deletions

View File

@ -2287,7 +2287,7 @@ static void export_array(struct mddev *mddev)
static bool set_in_sync(struct mddev *mddev) static bool set_in_sync(struct mddev *mddev)
{ {
WARN_ON_ONCE(!spin_is_locked(&mddev->lock)); WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock));
if (!mddev->in_sync) { if (!mddev->in_sync) {
mddev->sync_checkers++; mddev->sync_checkers++;
spin_unlock(&mddev->lock); spin_unlock(&mddev->lock);

View File

@ -731,58 +731,4 @@ static inline void mddev_check_write_zeroes(struct mddev *mddev, struct bio *bio
!bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors) !bdev_get_queue(bio->bi_bdev)->limits.max_write_zeroes_sectors)
mddev->queue->limits.max_write_zeroes_sectors = 0; mddev->queue->limits.max_write_zeroes_sectors = 0;
} }
/* Maximum size of each resync request */
#define RESYNC_BLOCK_SIZE (64*1024)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
/* for managing resync I/O pages */
struct resync_pages {
unsigned idx; /* for get/put page from the pool */
void *raid_bio;
struct page *pages[RESYNC_PAGES];
};
static inline int resync_alloc_pages(struct resync_pages *rp,
gfp_t gfp_flags)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++) {
rp->pages[i] = alloc_page(gfp_flags);
if (!rp->pages[i])
goto out_free;
}
return 0;
out_free:
while (--i >= 0)
put_page(rp->pages[i]);
return -ENOMEM;
}
static inline void resync_free_pages(struct resync_pages *rp)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++)
put_page(rp->pages[i]);
}
static inline void resync_get_all_pages(struct resync_pages *rp)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++)
get_page(rp->pages[i]);
}
static inline struct page *resync_fetch_page(struct resync_pages *rp,
unsigned idx)
{
if (WARN_ON_ONCE(idx >= RESYNC_PAGES))
return NULL;
return rp->pages[idx];
}
#endif /* _MD_MD_H */ #endif /* _MD_MD_H */

81
drivers/md/raid1-10.c Normal file
View File

@ -0,0 +1,81 @@
/* Maximum size of each resync request */
#define RESYNC_BLOCK_SIZE (64*1024)
#define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE)
/* for managing resync I/O pages */
struct resync_pages {
void *raid_bio;
struct page *pages[RESYNC_PAGES];
};
static inline int resync_alloc_pages(struct resync_pages *rp,
gfp_t gfp_flags)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++) {
rp->pages[i] = alloc_page(gfp_flags);
if (!rp->pages[i])
goto out_free;
}
return 0;
out_free:
while (--i >= 0)
put_page(rp->pages[i]);
return -ENOMEM;
}
static inline void resync_free_pages(struct resync_pages *rp)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++)
put_page(rp->pages[i]);
}
static inline void resync_get_all_pages(struct resync_pages *rp)
{
int i;
for (i = 0; i < RESYNC_PAGES; i++)
get_page(rp->pages[i]);
}
static inline struct page *resync_fetch_page(struct resync_pages *rp,
unsigned idx)
{
if (WARN_ON_ONCE(idx >= RESYNC_PAGES))
return NULL;
return rp->pages[idx];
}
/*
* 'strct resync_pages' stores actual pages used for doing the resync
* IO, and it is per-bio, so make .bi_private points to it.
*/
static inline struct resync_pages *get_resync_pages(struct bio *bio)
{
return bio->bi_private;
}
/* generally called after bio_reset() for reseting bvec */
static void md_bio_reset_resync_pages(struct bio *bio, struct resync_pages *rp,
int size)
{
int idx = 0;
/* initialize bvec table again */
do {
struct page *page = resync_fetch_page(rp, idx);
int len = min_t(int, size, PAGE_SIZE);
/*
* won't fail because the vec table is big
* enough to hold all these pages
*/
bio_add_page(bio, page, len, 0);
size -= len;
} while (idx++ < RESYNC_PAGES && size > 0);
}

View File

@ -81,14 +81,7 @@ static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
#define raid1_log(md, fmt, args...) \ #define raid1_log(md, fmt, args...) \
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0) do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid1 " fmt, ##args); } while (0)
/* #include "raid1-10.c"
* 'strct resync_pages' stores actual pages used for doing the resync
* IO, and it is per-bio, so make .bi_private points to it.
*/
static inline struct resync_pages *get_resync_pages(struct bio *bio)
{
return bio->bi_private;
}
/* /*
* for resync bio, r1bio pointer can be retrieved from the per-bio * for resync bio, r1bio pointer can be retrieved from the per-bio
@ -170,7 +163,6 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
resync_get_all_pages(rp); resync_get_all_pages(rp);
} }
rp->idx = 0;
rp->raid_bio = r1_bio; rp->raid_bio = r1_bio;
bio->bi_private = rp; bio->bi_private = rp;
} }
@ -492,10 +484,6 @@ static void raid1_end_write_request(struct bio *bio)
} }
if (behind) { if (behind) {
/* we release behind master bio when all write are done */
if (r1_bio->behind_master_bio == bio)
to_put = NULL;
if (test_bit(WriteMostly, &rdev->flags)) if (test_bit(WriteMostly, &rdev->flags))
atomic_dec(&r1_bio->behind_remaining); atomic_dec(&r1_bio->behind_remaining);
@ -802,8 +790,7 @@ static void flush_bio_list(struct r1conf *conf, struct bio *bio)
bio->bi_next = NULL; bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) { if (test_bit(Faulty, &rdev->flags)) {
bio->bi_status = BLK_STS_IOERR; bio_io_error(bio);
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */ /* Just ignore it */
@ -1088,7 +1075,7 @@ static void unfreeze_array(struct r1conf *conf)
wake_up(&conf->wait_barrier); wake_up(&conf->wait_barrier);
} }
static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio, static void alloc_behind_master_bio(struct r1bio *r1_bio,
struct bio *bio) struct bio *bio)
{ {
int size = bio->bi_iter.bi_size; int size = bio->bi_iter.bi_size;
@ -1098,11 +1085,13 @@ static struct bio *alloc_behind_master_bio(struct r1bio *r1_bio,
behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev); behind_bio = bio_alloc_mddev(GFP_NOIO, vcnt, r1_bio->mddev);
if (!behind_bio) if (!behind_bio)
goto fail; return;
/* discard op, we don't support writezero/writesame yet */ /* discard op, we don't support writezero/writesame yet */
if (!bio_has_data(bio)) if (!bio_has_data(bio)) {
behind_bio->bi_iter.bi_size = size;
goto skip_copy; goto skip_copy;
}
while (i < vcnt && size) { while (i < vcnt && size) {
struct page *page; struct page *page;
@ -1123,14 +1112,13 @@ skip_copy:
r1_bio->behind_master_bio = behind_bio;; r1_bio->behind_master_bio = behind_bio;;
set_bit(R1BIO_BehindIO, &r1_bio->state); set_bit(R1BIO_BehindIO, &r1_bio->state);
return behind_bio; return;
free_pages: free_pages:
pr_debug("%dB behind alloc failed, doing sync I/O\n", pr_debug("%dB behind alloc failed, doing sync I/O\n",
bio->bi_iter.bi_size); bio->bi_iter.bi_size);
bio_free_pages(behind_bio); bio_free_pages(behind_bio);
fail: bio_put(behind_bio);
return behind_bio;
} }
struct raid1_plug_cb { struct raid1_plug_cb {
@ -1483,7 +1471,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
(atomic_read(&bitmap->behind_writes) (atomic_read(&bitmap->behind_writes)
< mddev->bitmap_info.max_write_behind) && < mddev->bitmap_info.max_write_behind) &&
!waitqueue_active(&bitmap->behind_wait)) { !waitqueue_active(&bitmap->behind_wait)) {
mbio = alloc_behind_master_bio(r1_bio, bio); alloc_behind_master_bio(r1_bio, bio);
} }
bitmap_startwrite(bitmap, r1_bio->sector, bitmap_startwrite(bitmap, r1_bio->sector,
@ -1493,14 +1481,11 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
first_clone = 0; first_clone = 0;
} }
if (!mbio) { if (r1_bio->behind_master_bio)
if (r1_bio->behind_master_bio) mbio = bio_clone_fast(r1_bio->behind_master_bio,
mbio = bio_clone_fast(r1_bio->behind_master_bio, GFP_NOIO, mddev->bio_set);
GFP_NOIO, else
mddev->bio_set); mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
else
mbio = bio_clone_fast(bio, GFP_NOIO, mddev->bio_set);
}
if (r1_bio->behind_master_bio) { if (r1_bio->behind_master_bio) {
if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags))
@ -2086,10 +2071,7 @@ static void process_checks(struct r1bio *r1_bio)
/* Fix variable parts of all bios */ /* Fix variable parts of all bios */
vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9); vcnt = (r1_bio->sectors + PAGE_SIZE / 512 - 1) >> (PAGE_SHIFT - 9);
for (i = 0; i < conf->raid_disks * 2; i++) { for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int size;
blk_status_t status; blk_status_t status;
struct bio_vec *bi;
struct bio *b = r1_bio->bios[i]; struct bio *b = r1_bio->bios[i];
struct resync_pages *rp = get_resync_pages(b); struct resync_pages *rp = get_resync_pages(b);
if (b->bi_end_io != end_sync_read) if (b->bi_end_io != end_sync_read)
@ -2098,8 +2080,6 @@ static void process_checks(struct r1bio *r1_bio)
status = b->bi_status; status = b->bi_status;
bio_reset(b); bio_reset(b);
b->bi_status = status; b->bi_status = status;
b->bi_vcnt = vcnt;
b->bi_iter.bi_size = r1_bio->sectors << 9;
b->bi_iter.bi_sector = r1_bio->sector + b->bi_iter.bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset; conf->mirrors[i].rdev->data_offset;
b->bi_bdev = conf->mirrors[i].rdev->bdev; b->bi_bdev = conf->mirrors[i].rdev->bdev;
@ -2107,15 +2087,8 @@ static void process_checks(struct r1bio *r1_bio)
rp->raid_bio = r1_bio; rp->raid_bio = r1_bio;
b->bi_private = rp; b->bi_private = rp;
size = b->bi_iter.bi_size; /* initialize bvec table again */
bio_for_each_segment_all(bi, b, j) { md_bio_reset_resync_pages(b, rp, r1_bio->sectors << 9);
bi->bv_offset = 0;
if (size > PAGE_SIZE)
bi->bv_len = PAGE_SIZE;
else
bi->bv_len = size;
size -= PAGE_SIZE;
}
} }
for (primary = 0; primary < conf->raid_disks * 2; primary++) for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read && if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
@ -2366,8 +2339,6 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
wbio = bio_clone_fast(r1_bio->behind_master_bio, wbio = bio_clone_fast(r1_bio->behind_master_bio,
GFP_NOIO, GFP_NOIO,
mddev->bio_set); mddev->bio_set);
/* We really need a _all clone */
wbio->bi_iter = (struct bvec_iter){ 0 };
} else { } else {
wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO, wbio = bio_clone_fast(r1_bio->master_bio, GFP_NOIO,
mddev->bio_set); mddev->bio_set);
@ -2619,6 +2590,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
int good_sectors = RESYNC_SECTORS; int good_sectors = RESYNC_SECTORS;
int min_bad = 0; /* number of sectors that are bad in all devices */ int min_bad = 0; /* number of sectors that are bad in all devices */
int idx = sector_to_idx(sector_nr); int idx = sector_to_idx(sector_nr);
int page_idx = 0;
if (!conf->r1buf_pool) if (!conf->r1buf_pool)
if (init_resync(conf)) if (init_resync(conf))
@ -2846,7 +2818,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
bio = r1_bio->bios[i]; bio = r1_bio->bios[i];
rp = get_resync_pages(bio); rp = get_resync_pages(bio);
if (bio->bi_end_io) { if (bio->bi_end_io) {
page = resync_fetch_page(rp, rp->idx++); page = resync_fetch_page(rp, page_idx);
/* /*
* won't fail because the vec table is big * won't fail because the vec table is big
@ -2858,7 +2830,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
nr_sectors += len>>9; nr_sectors += len>>9;
sector_nr += len>>9; sector_nr += len>>9;
sync_blocks -= (len>>9); sync_blocks -= (len>>9);
} while (get_resync_pages(r1_bio->bios[disk]->bi_private)->idx < RESYNC_PAGES); } while (++page_idx < RESYNC_PAGES);
r1_bio->sectors = nr_sectors; r1_bio->sectors = nr_sectors;

View File

@ -110,14 +110,7 @@ static void end_reshape(struct r10conf *conf);
#define raid10_log(md, fmt, args...) \ #define raid10_log(md, fmt, args...) \
do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0) do { if ((md)->queue) blk_add_trace_msg((md)->queue, "raid10 " fmt, ##args); } while (0)
/* #include "raid1-10.c"
* 'strct resync_pages' stores actual pages used for doing the resync
* IO, and it is per-bio, so make .bi_private points to it.
*/
static inline struct resync_pages *get_resync_pages(struct bio *bio)
{
return bio->bi_private;
}
/* /*
* for resync bio, r10bio pointer can be retrieved from the per-bio * for resync bio, r10bio pointer can be retrieved from the per-bio
@ -221,7 +214,6 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
resync_get_all_pages(rp); resync_get_all_pages(rp);
} }
rp->idx = 0;
rp->raid_bio = r10_bio; rp->raid_bio = r10_bio;
bio->bi_private = rp; bio->bi_private = rp;
if (rbio) { if (rbio) {
@ -913,8 +905,7 @@ static void flush_pending_writes(struct r10conf *conf)
bio->bi_next = NULL; bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) { if (test_bit(Faulty, &rdev->flags)) {
bio->bi_status = BLK_STS_IOERR; bio_io_error(bio);
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */ /* Just ignore it */
@ -1098,8 +1089,7 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
bio->bi_next = NULL; bio->bi_next = NULL;
bio->bi_bdev = rdev->bdev; bio->bi_bdev = rdev->bdev;
if (test_bit(Faulty, &rdev->flags)) { if (test_bit(Faulty, &rdev->flags)) {
bio->bi_status = BLK_STS_IOERR; bio_io_error(bio);
bio_endio(bio);
} else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) && } else if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
!blk_queue_discard(bdev_get_queue(bio->bi_bdev)))) !blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
/* Just ignore it */ /* Just ignore it */
@ -2087,8 +2077,8 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
rp = get_resync_pages(tbio); rp = get_resync_pages(tbio);
bio_reset(tbio); bio_reset(tbio);
tbio->bi_vcnt = vcnt; md_bio_reset_resync_pages(tbio, rp, fbio->bi_iter.bi_size);
tbio->bi_iter.bi_size = fbio->bi_iter.bi_size;
rp->raid_bio = r10_bio; rp->raid_bio = r10_bio;
tbio->bi_private = rp; tbio->bi_private = rp;
tbio->bi_iter.bi_sector = r10_bio->devs[i].addr; tbio->bi_iter.bi_sector = r10_bio->devs[i].addr;
@ -2853,6 +2843,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t sectors_skipped = 0; sector_t sectors_skipped = 0;
int chunks_skipped = 0; int chunks_skipped = 0;
sector_t chunk_mask = conf->geo.chunk_mask; sector_t chunk_mask = conf->geo.chunk_mask;
int page_idx = 0;
if (!conf->r10buf_pool) if (!conf->r10buf_pool)
if (init_resync(conf)) if (init_resync(conf))
@ -3355,7 +3346,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
break; break;
for (bio= biolist ; bio ; bio=bio->bi_next) { for (bio= biolist ; bio ; bio=bio->bi_next) {
struct resync_pages *rp = get_resync_pages(bio); struct resync_pages *rp = get_resync_pages(bio);
page = resync_fetch_page(rp, rp->idx++); page = resync_fetch_page(rp, page_idx);
/* /*
* won't fail because the vec table is big enough * won't fail because the vec table is big enough
* to hold all these pages * to hold all these pages
@ -3364,7 +3355,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
} }
nr_sectors += len>>9; nr_sectors += len>>9;
sector_nr += len>>9; sector_nr += len>>9;
} while (get_resync_pages(biolist)->idx < RESYNC_PAGES); } while (++page_idx < RESYNC_PAGES);
r10_bio->sectors = nr_sectors; r10_bio->sectors = nr_sectors;
while (biolist) { while (biolist) {

View File

@ -3381,9 +3381,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) { sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector);
bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev); md_write_end(conf->mddev);
bio_endio(bi); bio_io_error(bi);
bi = nextbi; bi = nextbi;
} }
if (bitmap_end) if (bitmap_end)
@ -3403,9 +3402,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].sector + STRIPE_SECTORS) { sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector);
bi->bi_status = BLK_STS_IOERR;
md_write_end(conf->mddev); md_write_end(conf->mddev);
bio_endio(bi); bio_io_error(bi);
bi = bi2; bi = bi2;
} }
@ -3429,8 +3427,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
struct bio *nextbi = struct bio *nextbi =
r5_next_bio(bi, sh->dev[i].sector); r5_next_bio(bi, sh->dev[i].sector);
bi->bi_status = BLK_STS_IOERR; bio_io_error(bi);
bio_endio(bi);
bi = nextbi; bi = nextbi;
} }
} }
@ -6237,6 +6234,8 @@ static void raid5_do_work(struct work_struct *work)
pr_debug("%d stripes handled\n", handled); pr_debug("%d stripes handled\n", handled);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
async_tx_issue_pending_all();
blk_finish_plug(&plug); blk_finish_plug(&plug);
pr_debug("--- raid5worker inactive\n"); pr_debug("--- raid5worker inactive\n");