Merge tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md

Pull MD fixes from Shaohua Li:
 "This includes several bug fixes:

   - Alexey Obitotskiy fixed a hang for faulty raid5 array with external
     management

   - Song Liu fixed two raid5 journal related bugs

   - Tomasz Majchrzak fixed a bad block recording issue and an
     accounting issue for raid10

   - ZhengYuan Liu fixed an accounting issue for raid5

   - I fixed a potential race condition and memory leak with DIF/DIX
     enabled

   - other trival fixes"

* tag 'md/4.8-rc4' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
  raid5: avoid unnecessary bio data set
  raid5: fix memory leak of bio integrity data
  raid10: record correct address of bad block
  md-cluster: fix error return code in join()
  r5cache: set MD_JOURNAL_CLEAN correctly
  md: don't print the same repeated messages about delayed sync operation
  md: remove obsolete ret in md_start_sync
  md: do not count journal as spare in GET_ARRAY_INFO
  md: Prevent IO hold during accessing to faulty raid5 array
  MD: hold mddev lock to change bitmap location
  raid5: fix incorrectly counter of conf->empty_inactive_list_nr
  raid10: increment write counter after bio is split
This commit is contained in:
Linus Torvalds 2016-08-30 11:24:04 -07:00
commit 86a1679860
5 changed files with 107 additions and 57 deletions

View File

@ -2183,19 +2183,29 @@ location_show(struct mddev *mddev, char *page)
static ssize_t static ssize_t
location_store(struct mddev *mddev, const char *buf, size_t len) location_store(struct mddev *mddev, const char *buf, size_t len)
{ {
int rv;
rv = mddev_lock(mddev);
if (rv)
return rv;
if (mddev->pers) { if (mddev->pers) {
if (!mddev->pers->quiesce) if (!mddev->pers->quiesce) {
return -EBUSY; rv = -EBUSY;
if (mddev->recovery || mddev->sync_thread) goto out;
return -EBUSY; }
if (mddev->recovery || mddev->sync_thread) {
rv = -EBUSY;
goto out;
}
} }
if (mddev->bitmap || mddev->bitmap_info.file || if (mddev->bitmap || mddev->bitmap_info.file ||
mddev->bitmap_info.offset) { mddev->bitmap_info.offset) {
/* bitmap already configured. Only option is to clear it */ /* bitmap already configured. Only option is to clear it */
if (strncmp(buf, "none", 4) != 0) if (strncmp(buf, "none", 4) != 0) {
return -EBUSY; rv = -EBUSY;
goto out;
}
if (mddev->pers) { if (mddev->pers) {
mddev->pers->quiesce(mddev, 1); mddev->pers->quiesce(mddev, 1);
bitmap_destroy(mddev); bitmap_destroy(mddev);
@ -2214,21 +2224,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
/* nothing to be done */; /* nothing to be done */;
else if (strncmp(buf, "file:", 5) == 0) { else if (strncmp(buf, "file:", 5) == 0) {
/* Not supported yet */ /* Not supported yet */
return -EINVAL; rv = -EINVAL;
goto out;
} else { } else {
int rv;
if (buf[0] == '+') if (buf[0] == '+')
rv = kstrtoll(buf+1, 10, &offset); rv = kstrtoll(buf+1, 10, &offset);
else else
rv = kstrtoll(buf, 10, &offset); rv = kstrtoll(buf, 10, &offset);
if (rv) if (rv)
return rv; goto out;
if (offset == 0) if (offset == 0) {
return -EINVAL; rv = -EINVAL;
goto out;
}
if (mddev->bitmap_info.external == 0 && if (mddev->bitmap_info.external == 0 &&
mddev->major_version == 0 && mddev->major_version == 0 &&
offset != mddev->bitmap_info.default_offset) offset != mddev->bitmap_info.default_offset) {
return -EINVAL; rv = -EINVAL;
goto out;
}
mddev->bitmap_info.offset = offset; mddev->bitmap_info.offset = offset;
if (mddev->pers) { if (mddev->pers) {
struct bitmap *bitmap; struct bitmap *bitmap;
@ -2245,7 +2259,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
mddev->pers->quiesce(mddev, 0); mddev->pers->quiesce(mddev, 0);
if (rv) { if (rv) {
bitmap_destroy(mddev); bitmap_destroy(mddev);
return rv; goto out;
} }
} }
} }
@ -2257,6 +2271,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
set_bit(MD_CHANGE_DEVS, &mddev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
} }
rv = 0;
out:
mddev_unlock(mddev);
if (rv)
return rv;
return len; return len;
} }

View File

@ -834,8 +834,10 @@ static int join(struct mddev *mddev, int nodes)
goto err; goto err;
} }
cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
if (!cinfo->ack_lockres) if (!cinfo->ack_lockres) {
ret = -ENOMEM;
goto err; goto err;
}
/* get sync CR lock on ACK. */ /* get sync CR lock on ACK. */
if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
@ -849,8 +851,10 @@ static int join(struct mddev *mddev, int nodes)
pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1); cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1);
if (!cinfo->bitmap_lockres) if (!cinfo->bitmap_lockres) {
ret = -ENOMEM;
goto err; goto err;
}
if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) { if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) {
pr_err("Failed to get bitmap lock\n"); pr_err("Failed to get bitmap lock\n");
ret = -EINVAL; ret = -EINVAL;
@ -858,8 +862,10 @@ static int join(struct mddev *mddev, int nodes)
} }
cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0); cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0);
if (!cinfo->resync_lockres) if (!cinfo->resync_lockres) {
ret = -ENOMEM;
goto err; goto err;
}
return 0; return 0;
err: err:

View File

@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->new_chunk_sectors = mddev->chunk_sectors;
} }
if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) { if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL)
set_bit(MD_HAS_JOURNAL, &mddev->flags); set_bit(MD_HAS_JOURNAL, &mddev->flags);
if (mddev->recovery_cp == MaxSector)
set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
}
} else if (mddev->pers == NULL) { } else if (mddev->pers == NULL) {
/* Insist of good event counter while assembling, except for /* Insist of good event counter while assembling, except for
* spares (which don't need an event count) */ * spares (which don't need an event count) */
@ -5851,6 +5848,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
working++; working++;
if (test_bit(In_sync, &rdev->flags)) if (test_bit(In_sync, &rdev->flags))
insync++; insync++;
else if (test_bit(Journal, &rdev->flags))
/* TODO: add journal count to md_u.h */
;
else else
spare++; spare++;
} }
@ -7862,6 +7862,7 @@ void md_do_sync(struct md_thread *thread)
*/ */
do { do {
int mddev2_minor = -1;
mddev->curr_resync = 2; mddev->curr_resync = 2;
try_again: try_again:
@ -7891,10 +7892,14 @@ void md_do_sync(struct md_thread *thread)
prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
mddev2->curr_resync >= mddev->curr_resync) { mddev2->curr_resync >= mddev->curr_resync) {
printk(KERN_INFO "md: delaying %s of %s" if (mddev2_minor != mddev2->md_minor) {
" until %s has finished (they" mddev2_minor = mddev2->md_minor;
" share one or more physical units)\n", printk(KERN_INFO "md: delaying %s of %s"
desc, mdname(mddev), mdname(mddev2)); " until %s has finished (they"
" share one or more physical units)\n",
desc, mdname(mddev),
mdname(mddev2));
}
mddev_put(mddev2); mddev_put(mddev2);
if (signal_pending(current)) if (signal_pending(current))
flush_signals(current); flush_signals(current);
@ -8275,16 +8280,13 @@ no_add:
static void md_start_sync(struct work_struct *ws) static void md_start_sync(struct work_struct *ws)
{ {
struct mddev *mddev = container_of(ws, struct mddev, del_work); struct mddev *mddev = container_of(ws, struct mddev, del_work);
int ret = 0;
mddev->sync_thread = md_register_thread(md_do_sync, mddev->sync_thread = md_register_thread(md_do_sync,
mddev, mddev,
"resync"); "resync");
if (!mddev->sync_thread) { if (!mddev->sync_thread) {
if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) printk(KERN_ERR "%s: could not start resync thread...\n",
printk(KERN_ERR "%s: could not start resync" mdname(mddev));
" thread...\n",
mdname(mddev));
/* leave the spares where they are, it shouldn't hurt */ /* leave the spares where they are, it shouldn't hurt */
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);

View File

@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
int max_sectors; int max_sectors;
int sectors; int sectors;
md_write_start(mddev, bio);
/* /*
* Register the new request and wait if the reconstruction * Register the new request and wait if the reconstruction
* thread has put up a bar for new requests. * thread has put up a bar for new requests.
@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio)
return; return;
} }
md_write_start(mddev, bio);
do { do {
/* /*
@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
while (sect_to_write) { while (sect_to_write) {
struct bio *wbio; struct bio *wbio;
sector_t wsector;
if (sectors > sect_to_write) if (sectors > sect_to_write)
sectors = sect_to_write; sectors = sect_to_write;
/* Write at 'sector' for 'sectors' */ /* Write at 'sector' for 'sectors' */
wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); wbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors);
wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector);
choose_data_offset(r10_bio, rdev) + wbio->bi_iter.bi_sector = wsector +
(sector - r10_bio->sector)); choose_data_offset(r10_bio, rdev);
wbio->bi_bdev = rdev->bdev; wbio->bi_bdev = rdev->bdev;
bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); bio_set_op_attrs(wbio, REQ_OP_WRITE, 0);
if (submit_bio_wait(wbio) < 0) if (submit_bio_wait(wbio) < 0)
/* Failure! */ /* Failure! */
ok = rdev_set_badblocks(rdev, sector, ok = rdev_set_badblocks(rdev, wsector,
sectors, 0) sectors, 0)
&& ok; && ok;

View File

@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
{ {
struct stripe_head *sh; struct stripe_head *sh;
int hash = stripe_hash_locks_hash(sector); int hash = stripe_hash_locks_hash(sector);
int inc_empty_inactive_list_flag;
pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector);
@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
atomic_inc(&conf->active_stripes); atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&sh->lru) && BUG_ON(list_empty(&sh->lru) &&
!test_bit(STRIPE_EXPANDING, &sh->state)); !test_bit(STRIPE_EXPANDING, &sh->state));
inc_empty_inactive_list_flag = 0;
if (!list_empty(conf->inactive_list + hash))
inc_empty_inactive_list_flag = 1;
list_del_init(&sh->lru); list_del_init(&sh->lru);
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
atomic_inc(&conf->empty_inactive_list_nr);
if (sh->group) { if (sh->group) {
sh->group->stripes_cnt--; sh->group->stripes_cnt--;
sh->group = NULL; sh->group = NULL;
@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
sector_t head_sector, tmp_sec; sector_t head_sector, tmp_sec;
int hash; int hash;
int dd_idx; int dd_idx;
int inc_empty_inactive_list_flag;
/* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */
tmp_sec = sh->sector; tmp_sec = sh->sector;
@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh
atomic_inc(&conf->active_stripes); atomic_inc(&conf->active_stripes);
BUG_ON(list_empty(&head->lru) && BUG_ON(list_empty(&head->lru) &&
!test_bit(STRIPE_EXPANDING, &head->state)); !test_bit(STRIPE_EXPANDING, &head->state));
inc_empty_inactive_list_flag = 0;
if (!list_empty(conf->inactive_list + hash))
inc_empty_inactive_list_flag = 1;
list_del_init(&head->lru); list_del_init(&head->lru);
if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag)
atomic_inc(&conf->empty_inactive_list_nr);
if (head->group) { if (head->group) {
head->group->stripes_cnt--; head->group->stripes_cnt--;
head->group = NULL; head->group = NULL;
@ -993,7 +1005,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state); set_bit(STRIPE_IO_STARTED, &sh->state);
bio_reset(bi);
bi->bi_bdev = rdev->bdev; bi->bi_bdev = rdev->bdev;
bio_set_op_attrs(bi, op, op_flags); bio_set_op_attrs(bi, op, op_flags);
bi->bi_end_io = op_is_write(op) bi->bi_end_io = op_is_write(op)
@ -1045,7 +1056,6 @@ again:
set_bit(STRIPE_IO_STARTED, &sh->state); set_bit(STRIPE_IO_STARTED, &sh->state);
bio_reset(rbi);
rbi->bi_bdev = rrdev->bdev; rbi->bi_bdev = rrdev->bdev;
bio_set_op_attrs(rbi, op, op_flags); bio_set_op_attrs(rbi, op, op_flags);
BUG_ON(!op_is_write(op)); BUG_ON(!op_is_write(op));
@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request)
put_cpu(); put_cpu();
} }
static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp,
int disks)
{ {
struct stripe_head *sh; struct stripe_head *sh;
int i;
sh = kmem_cache_zalloc(sc, gfp); sh = kmem_cache_zalloc(sc, gfp);
if (sh) { if (sh) {
@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp)
INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->batch_list);
INIT_LIST_HEAD(&sh->lru); INIT_LIST_HEAD(&sh->lru);
atomic_set(&sh->count, 1); atomic_set(&sh->count, 1);
for (i = 0; i < disks; i++) {
struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
dev->req.bi_max_vecs = 1;
bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec;
dev->rreq.bi_max_vecs = 1;
}
} }
return sh; return sh;
} }
@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp)
{ {
struct stripe_head *sh; struct stripe_head *sh;
sh = alloc_stripe(conf->slab_cache, gfp); sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size);
if (!sh) if (!sh)
return 0; return 0;
@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
mutex_lock(&conf->cache_size_mutex); mutex_lock(&conf->cache_size_mutex);
for (i = conf->max_nr_stripes; i; i--) { for (i = conf->max_nr_stripes; i; i--) {
nsh = alloc_stripe(sc, GFP_KERNEL); nsh = alloc_stripe(sc, GFP_KERNEL, newsize);
if (!nsh) if (!nsh)
break; break;
@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count), (unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error); bi->bi_error);
if (i == disks) { if (i == disks) {
bio_reset(bi);
BUG(); BUG();
return; return;
} }
@ -2402,6 +2426,7 @@ static void raid5_end_read_request(struct bio * bi)
clear_bit(R5_LOCKED, &sh->dev[i].flags); clear_bit(R5_LOCKED, &sh->dev[i].flags);
set_bit(STRIPE_HANDLE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state);
raid5_release_stripe(sh); raid5_release_stripe(sh);
bio_reset(bi);
} }
static void raid5_end_write_request(struct bio *bi) static void raid5_end_write_request(struct bio *bi)
@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi)
(unsigned long long)sh->sector, i, atomic_read(&sh->count), (unsigned long long)sh->sector, i, atomic_read(&sh->count),
bi->bi_error); bi->bi_error);
if (i == disks) { if (i == disks) {
bio_reset(bi);
BUG(); BUG();
return; return;
} }
@ -2479,22 +2505,13 @@ static void raid5_end_write_request(struct bio *bi)
if (sh->batch_head && sh != sh->batch_head) if (sh->batch_head && sh != sh->batch_head)
raid5_release_stripe(sh->batch_head); raid5_release_stripe(sh->batch_head);
bio_reset(bi);
} }
static void raid5_build_block(struct stripe_head *sh, int i, int previous) static void raid5_build_block(struct stripe_head *sh, int i, int previous)
{ {
struct r5dev *dev = &sh->dev[i]; struct r5dev *dev = &sh->dev[i];
bio_init(&dev->req);
dev->req.bi_io_vec = &dev->vec;
dev->req.bi_max_vecs = 1;
dev->req.bi_private = sh;
bio_init(&dev->rreq);
dev->rreq.bi_io_vec = &dev->rvec;
dev->rreq.bi_max_vecs = 1;
dev->rreq.bi_private = sh;
dev->flags = 0; dev->flags = 0;
dev->sector = raid5_compute_blocknr(sh, i, previous); dev->sector = raid5_compute_blocknr(sh, i, previous);
} }
@ -4628,7 +4645,9 @@ finish:
} }
if (!bio_list_empty(&s.return_bi)) { if (!bio_list_empty(&s.return_bi)) {
if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) { if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) &&
(s.failed <= conf->max_degraded ||
conf->mddev->external == 0)) {
spin_lock_irq(&conf->device_lock); spin_lock_irq(&conf->device_lock);
bio_list_merge(&conf->return_bi, &s.return_bi); bio_list_merge(&conf->return_bi, &s.return_bi);
spin_unlock_irq(&conf->device_lock); spin_unlock_irq(&conf->device_lock);
@ -6826,11 +6845,14 @@ static int raid5_run(struct mddev *mddev)
if (IS_ERR(conf)) if (IS_ERR(conf))
return PTR_ERR(conf); return PTR_ERR(conf);
if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) {
printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", if (!journal_dev) {
mdname(mddev)); pr_err("md/raid:%s: journal disk is missing, force array readonly\n",
mddev->ro = 1; mdname(mddev));
set_disk_ro(mddev->gendisk, 1); mddev->ro = 1;
set_disk_ro(mddev->gendisk, 1);
} else if (mddev->recovery_cp == MaxSector)
set_bit(MD_JOURNAL_CLEAN, &mddev->flags);
} }
conf->min_offset_diff = min_offset_diff; conf->min_offset_diff = min_offset_diff;