Merge tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li:
"This update mainly fixes bugs.
- a raid5 discard related fix from Jes
- a MD multipath bio clone fix from Ming
- raid1 error handling deadlock fix from Nate and corresponding
raid10 fix from myself
- a raid5 stripe batch fix from Neil
- a patch from Sebastian to avoid unnecessary uevent
- several cleanup/debug patches"
* tag 'md/4.6-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md:
md/raid5: Cleanup cpu hotplug notifier
raid10: include bio_end_io_list in nr_queued to prevent freeze_array hang
raid1: include bio_end_io_list in nr_queued to prevent freeze_array hang
md: fix typos for stipe
md/bitmap: remove redundant return in bitmap_checkpage
md/raid1: remove unnecessary BUG_ON
md: multipath: don't hardcopy bio in .make_request path
md/raid5: output stripe state for debug
md/raid5: preserve STRIPE_PREREAD_ACTIVE in break_stripe_batch_list
Update MD git tree URL
md/bitmap: remove redundant check
MD: warn for potential deadlock
md: Drop sending a change uevent when stopping
RAID5: revert e9e4c377e2
to fix a livelock
RAID5: check_reshape() shouldn't call mddev_suspend
md/raid5: Compare apples to apples (or sectors to sectors)
This commit is contained in:
commit
4526b710c1
|
@ -10291,7 +10291,7 @@ F: drivers/media/pci/solo6x10/
|
|||
SOFTWARE RAID (Multiple Disks) SUPPORT
|
||||
M: Shaohua Li <shli@kernel.org>
|
||||
L: linux-raid@vger.kernel.org
|
||||
T: git git://neil.brown.name/md
|
||||
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shli/md.git
|
||||
S: Supported
|
||||
F: drivers/md/
|
||||
F: include/linux/raid/
|
||||
|
|
|
@ -98,7 +98,6 @@ __acquires(bitmap->lock)
|
|||
bitmap->bp[page].hijacked) {
|
||||
/* somebody beat us to getting the page */
|
||||
kfree(mappage);
|
||||
return 0;
|
||||
} else {
|
||||
|
||||
/* no page was in place and we have one, so install it */
|
||||
|
@ -510,8 +509,7 @@ static int bitmap_new_disk_sb(struct bitmap *bitmap)
|
|||
sb->chunksize = cpu_to_le32(chunksize);
|
||||
|
||||
daemon_sleep = bitmap->mddev->bitmap_info.daemon_sleep;
|
||||
if (!daemon_sleep ||
|
||||
(daemon_sleep < 1) || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
|
||||
if (!daemon_sleep || (daemon_sleep > MAX_SCHEDULE_TIMEOUT)) {
|
||||
printk(KERN_INFO "Choosing daemon_sleep default (5 sec)\n");
|
||||
daemon_sleep = 5 * HZ;
|
||||
}
|
||||
|
|
|
@ -49,8 +49,8 @@
|
|||
* When we set a bit, or in the counter (to start a write), if the fields is
|
||||
* 0, we first set the disk bit and set the counter to 1.
|
||||
*
|
||||
* If the counter is 0, the on-disk bit is clear and the stipe is clean
|
||||
* Anything that dirties the stipe pushes the counter to 2 (at least)
|
||||
* If the counter is 0, the on-disk bit is clear and the stripe is clean
|
||||
* Anything that dirties the stripe pushes the counter to 2 (at least)
|
||||
* and sets the on-disk bit (lazily).
|
||||
* If a periodic sweep find the counter at 2, it is decremented to 1.
|
||||
* If the sweep find the counter at 1, the on-disk bit is cleared and the
|
||||
|
|
|
@ -305,6 +305,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
|
|||
*/
|
||||
void mddev_suspend(struct mddev *mddev)
|
||||
{
|
||||
WARN_ON_ONCE(current == mddev->thread->tsk);
|
||||
if (mddev->suspended++)
|
||||
return;
|
||||
synchronize_rcu();
|
||||
|
@ -5671,7 +5672,6 @@ static int do_md_stop(struct mddev *mddev, int mode,
|
|||
export_array(mddev);
|
||||
|
||||
md_clean(mddev);
|
||||
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
|
||||
if (mddev->hold_active == UNTIL_STOP)
|
||||
mddev->hold_active = 0;
|
||||
}
|
||||
|
|
|
@ -129,7 +129,9 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
|
|||
}
|
||||
multipath = conf->multipaths + mp_bh->path;
|
||||
|
||||
mp_bh->bio = *bio;
|
||||
bio_init(&mp_bh->bio);
|
||||
__bio_clone_fast(&mp_bh->bio, bio);
|
||||
|
||||
mp_bh->bio.bi_iter.bi_sector += multipath->rdev->data_offset;
|
||||
mp_bh->bio.bi_bdev = multipath->rdev->bdev;
|
||||
mp_bh->bio.bi_rw |= REQ_FAILFAST_TRANSPORT;
|
||||
|
|
|
@ -2274,6 +2274,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
|
|||
if (fail) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
|
||||
conf->nr_queued++;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else {
|
||||
|
@ -2391,8 +2392,10 @@ static void raid1d(struct md_thread *thread)
|
|||
LIST_HEAD(tmp);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
list_add(&tmp, &conf->bio_end_io_list);
|
||||
list_del_init(&conf->bio_end_io_list);
|
||||
while (!list_empty(&conf->bio_end_io_list)) {
|
||||
list_move(conf->bio_end_io_list.prev, &tmp);
|
||||
conf->nr_queued--;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
while (!list_empty(&tmp)) {
|
||||
|
@ -2695,7 +2698,6 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
!conf->fullsync &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
break;
|
||||
BUG_ON(sync_blocks < (PAGE_SIZE>>9));
|
||||
if ((len >> 9) > sync_blocks)
|
||||
len = sync_blocks<<9;
|
||||
}
|
||||
|
|
|
@ -2664,6 +2664,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
|
|||
if (fail) {
|
||||
spin_lock_irq(&conf->device_lock);
|
||||
list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
|
||||
conf->nr_queued++;
|
||||
spin_unlock_irq(&conf->device_lock);
|
||||
md_wakeup_thread(conf->mddev->thread);
|
||||
} else {
|
||||
|
@ -2691,8 +2692,10 @@ static void raid10d(struct md_thread *thread)
|
|||
LIST_HEAD(tmp);
|
||||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (!test_bit(MD_CHANGE_PENDING, &mddev->flags)) {
|
||||
list_add(&tmp, &conf->bio_end_io_list);
|
||||
list_del_init(&conf->bio_end_io_list);
|
||||
while (!list_empty(&conf->bio_end_io_list)) {
|
||||
list_move(conf->bio_end_io_list.prev, &tmp);
|
||||
conf->nr_queued--;
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
while (!list_empty(&tmp)) {
|
||||
|
|
|
@ -340,8 +340,7 @@ static void release_inactive_stripe_list(struct r5conf *conf,
|
|||
int hash)
|
||||
{
|
||||
int size;
|
||||
unsigned long do_wakeup = 0;
|
||||
int i = 0;
|
||||
bool do_wakeup = false;
|
||||
unsigned long flags;
|
||||
|
||||
if (hash == NR_STRIPE_HASH_LOCKS) {
|
||||
|
@ -362,19 +361,15 @@ static void release_inactive_stripe_list(struct r5conf *conf,
|
|||
!list_empty(list))
|
||||
atomic_dec(&conf->empty_inactive_list_nr);
|
||||
list_splice_tail_init(list, conf->inactive_list + hash);
|
||||
do_wakeup |= 1 << hash;
|
||||
do_wakeup = true;
|
||||
spin_unlock_irqrestore(conf->hash_locks + hash, flags);
|
||||
}
|
||||
size--;
|
||||
hash--;
|
||||
}
|
||||
|
||||
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
|
||||
if (do_wakeup & (1 << i))
|
||||
wake_up(&conf->wait_for_stripe[i]);
|
||||
}
|
||||
|
||||
if (do_wakeup) {
|
||||
wake_up(&conf->wait_for_stripe);
|
||||
if (atomic_read(&conf->active_stripes) == 0)
|
||||
wake_up(&conf->wait_for_quiescent);
|
||||
if (conf->retry_read_aligned)
|
||||
|
@ -687,15 +682,14 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
|||
if (!sh) {
|
||||
set_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state);
|
||||
wait_event_exclusive_cmd(
|
||||
conf->wait_for_stripe[hash],
|
||||
wait_event_lock_irq(
|
||||
conf->wait_for_stripe,
|
||||
!list_empty(conf->inactive_list + hash) &&
|
||||
(atomic_read(&conf->active_stripes)
|
||||
< (conf->max_nr_stripes * 3 / 4)
|
||||
|| !test_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state)),
|
||||
spin_unlock_irq(conf->hash_locks + hash),
|
||||
spin_lock_irq(conf->hash_locks + hash));
|
||||
*(conf->hash_locks + hash));
|
||||
clear_bit(R5_INACTIVE_BLOCKED,
|
||||
&conf->cache_state);
|
||||
} else {
|
||||
|
@ -720,9 +714,6 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector,
|
|||
}
|
||||
} while (sh == NULL);
|
||||
|
||||
if (!list_empty(conf->inactive_list + hash))
|
||||
wake_up(&conf->wait_for_stripe[hash]);
|
||||
|
||||
spin_unlock_irq(conf->hash_locks + hash);
|
||||
return sh;
|
||||
}
|
||||
|
@ -2089,6 +2080,14 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
|
|||
unsigned long cpu;
|
||||
int err = 0;
|
||||
|
||||
/*
|
||||
* Never shrink. And mddev_suspend() could deadlock if this is called
|
||||
* from raid5d. In that case, scribble_disks and scribble_sectors
|
||||
* should equal to new_disks and new_sectors
|
||||
*/
|
||||
if (conf->scribble_disks >= new_disks &&
|
||||
conf->scribble_sectors >= new_sectors)
|
||||
return 0;
|
||||
mddev_suspend(conf->mddev);
|
||||
get_online_cpus();
|
||||
for_each_present_cpu(cpu) {
|
||||
|
@ -2110,6 +2109,10 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
|
|||
}
|
||||
put_online_cpus();
|
||||
mddev_resume(conf->mddev);
|
||||
if (!err) {
|
||||
conf->scribble_disks = new_disks;
|
||||
conf->scribble_sectors = new_sectors;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -2190,7 +2193,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
|
|||
cnt = 0;
|
||||
list_for_each_entry(nsh, &newstripes, lru) {
|
||||
lock_device_hash_lock(conf, hash);
|
||||
wait_event_exclusive_cmd(conf->wait_for_stripe[hash],
|
||||
wait_event_cmd(conf->wait_for_stripe,
|
||||
!list_empty(conf->inactive_list + hash),
|
||||
unlock_device_hash_lock(conf, hash),
|
||||
lock_device_hash_lock(conf, hash));
|
||||
|
@ -4233,10 +4236,9 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
|||
|
||||
list_del_init(&sh->batch_list);
|
||||
|
||||
WARN_ON_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
|
||||
WARN_ONCE(sh->state & ((1 << STRIPE_ACTIVE) |
|
||||
(1 << STRIPE_SYNCING) |
|
||||
(1 << STRIPE_REPLACED) |
|
||||
(1 << STRIPE_PREREAD_ACTIVE) |
|
||||
(1 << STRIPE_DELAYED) |
|
||||
(1 << STRIPE_BIT_DELAY) |
|
||||
(1 << STRIPE_FULL_WRITE) |
|
||||
|
@ -4246,11 +4248,14 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
|
|||
(1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_BATCH_READY) |
|
||||
(1 << STRIPE_BATCH_ERR) |
|
||||
(1 << STRIPE_BITMAP_PENDING)));
|
||||
WARN_ON_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_REPLACED)));
|
||||
(1 << STRIPE_BITMAP_PENDING)),
|
||||
"stripe state: %lx\n", sh->state);
|
||||
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
|
||||
(1 << STRIPE_REPLACED)),
|
||||
"head stripe state: %lx\n", head_sh->state);
|
||||
|
||||
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
|
||||
(1 << STRIPE_PREREAD_ACTIVE) |
|
||||
(1 << STRIPE_DEGRADED)),
|
||||
head_sh->state & (1 << STRIPE_INSYNC));
|
||||
|
||||
|
@ -6376,6 +6381,8 @@ static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action,
|
|||
break;
|
||||
case CPU_DEAD:
|
||||
case CPU_DEAD_FROZEN:
|
||||
case CPU_UP_CANCELED:
|
||||
case CPU_UP_CANCELED_FROZEN:
|
||||
free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu));
|
||||
break;
|
||||
default:
|
||||
|
@ -6413,6 +6420,12 @@ static int raid5_alloc_percpu(struct r5conf *conf)
|
|||
}
|
||||
put_online_cpus();
|
||||
|
||||
if (!err) {
|
||||
conf->scribble_disks = max(conf->raid_disks,
|
||||
conf->previous_raid_disks);
|
||||
conf->scribble_sectors = max(conf->chunk_sectors,
|
||||
conf->prev_chunk_sectors);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
|
@ -6503,9 +6516,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
|
|||
seqcount_init(&conf->gen_lock);
|
||||
mutex_init(&conf->cache_size_mutex);
|
||||
init_waitqueue_head(&conf->wait_for_quiescent);
|
||||
for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) {
|
||||
init_waitqueue_head(&conf->wait_for_stripe[i]);
|
||||
}
|
||||
init_waitqueue_head(&conf->wait_for_stripe);
|
||||
init_waitqueue_head(&conf->wait_for_overlap);
|
||||
INIT_LIST_HEAD(&conf->handle_list);
|
||||
INIT_LIST_HEAD(&conf->hold_list);
|
||||
|
@ -7014,8 +7025,8 @@ static int raid5_run(struct mddev *mddev)
|
|||
}
|
||||
|
||||
if (discard_supported &&
|
||||
mddev->queue->limits.max_discard_sectors >= stripe &&
|
||||
mddev->queue->limits.discard_granularity >= stripe)
|
||||
mddev->queue->limits.max_discard_sectors >= (stripe >> 9) &&
|
||||
mddev->queue->limits.discard_granularity >= stripe)
|
||||
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
|
||||
mddev->queue);
|
||||
else
|
||||
|
|
|
@ -510,6 +510,8 @@ struct r5conf {
|
|||
* conversions
|
||||
*/
|
||||
} __percpu *percpu;
|
||||
int scribble_disks;
|
||||
int scribble_sectors;
|
||||
#ifdef CONFIG_HOTPLUG_CPU
|
||||
struct notifier_block cpu_notify;
|
||||
#endif
|
||||
|
@ -522,7 +524,7 @@ struct r5conf {
|
|||
atomic_t empty_inactive_list_nr;
|
||||
struct llist_head released_stripes;
|
||||
wait_queue_head_t wait_for_quiescent;
|
||||
wait_queue_head_t wait_for_stripe[NR_STRIPE_HASH_LOCKS];
|
||||
wait_queue_head_t wait_for_stripe;
|
||||
wait_queue_head_t wait_for_overlap;
|
||||
unsigned long cache_state;
|
||||
#define R5_INACTIVE_BLOCKED 1 /* release of inactive stripes blocked,
|
||||
|
|
Loading…
Reference in New Issue