Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: md: allow upper limit for resync/reshape to be set when array is read-only md/raid5: Properly remove excess drives after shrinking a raid5/6 md/raid5: make sure a reshape restarts at the correct address. md/raid5: allow new reshape modes to be restarted in the middle. md: never advance 'events' counter by more than 1. Remove deadlock potential in md_open
This commit is contained in:
commit
d58d2d1ade
|
@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
|
||||||
else
|
else
|
||||||
new->md_minor = MINOR(unit) >> MdpMinorShift;
|
new->md_minor = MINOR(unit) >> MdpMinorShift;
|
||||||
|
|
||||||
|
mutex_init(&new->open_mutex);
|
||||||
mutex_init(&new->reconfig_mutex);
|
mutex_init(&new->reconfig_mutex);
|
||||||
INIT_LIST_HEAD(&new->disks);
|
INIT_LIST_HEAD(&new->disks);
|
||||||
INIT_LIST_HEAD(&new->all_mddevs);
|
INIT_LIST_HEAD(&new->all_mddevs);
|
||||||
|
@ -1974,17 +1975,14 @@ repeat:
|
||||||
/* otherwise we have to go forward and ... */
|
/* otherwise we have to go forward and ... */
|
||||||
mddev->events ++;
|
mddev->events ++;
|
||||||
if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
|
if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
|
||||||
/* .. if the array isn't clean, insist on an odd 'events' */
|
/* .. if the array isn't clean, an 'even' event must also go
|
||||||
if ((mddev->events&1)==0) {
|
* to spares. */
|
||||||
mddev->events++;
|
if ((mddev->events&1)==0)
|
||||||
nospares = 0;
|
nospares = 0;
|
||||||
}
|
|
||||||
} else {
|
} else {
|
||||||
/* otherwise insist on an even 'events' (for clean states) */
|
/* otherwise an 'odd' event must go to spares */
|
||||||
if ((mddev->events&1)) {
|
if ((mddev->events&1))
|
||||||
mddev->events++;
|
|
||||||
nospares = 0;
|
nospares = 0;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3601,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
|
||||||
if (max < mddev->resync_min)
|
if (max < mddev->resync_min)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (max < mddev->resync_max &&
|
if (max < mddev->resync_max &&
|
||||||
|
mddev->ro == 0 &&
|
||||||
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
|
||||||
return -EBUSY;
|
return -EBUSY;
|
||||||
|
|
||||||
|
@ -4304,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
|
||||||
struct gendisk *disk = mddev->gendisk;
|
struct gendisk *disk = mddev->gendisk;
|
||||||
mdk_rdev_t *rdev;
|
mdk_rdev_t *rdev;
|
||||||
|
|
||||||
|
mutex_lock(&mddev->open_mutex);
|
||||||
if (atomic_read(&mddev->openers) > is_open) {
|
if (atomic_read(&mddev->openers) > is_open) {
|
||||||
printk("md: %s still in use.\n",mdname(mddev));
|
printk("md: %s still in use.\n",mdname(mddev));
|
||||||
return -EBUSY;
|
err = -EBUSY;
|
||||||
}
|
} else if (mddev->pers) {
|
||||||
|
|
||||||
if (mddev->pers) {
|
|
||||||
|
|
||||||
if (mddev->sync_thread) {
|
if (mddev->sync_thread) {
|
||||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
|
@ -4367,7 +4365,10 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
|
||||||
set_disk_ro(disk, 1);
|
set_disk_ro(disk, 1);
|
||||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
}
|
}
|
||||||
|
out:
|
||||||
|
mutex_unlock(&mddev->open_mutex);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
/*
|
/*
|
||||||
* Free resources if final stop
|
* Free resources if final stop
|
||||||
*/
|
*/
|
||||||
|
@ -4433,7 +4434,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
|
||||||
blk_integrity_unregister(disk);
|
blk_integrity_unregister(disk);
|
||||||
md_new_event(mddev);
|
md_new_event(mddev);
|
||||||
sysfs_notify_dirent(mddev->sysfs_state);
|
sysfs_notify_dirent(mddev->sysfs_state);
|
||||||
out:
|
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5518,12 +5518,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
|
||||||
}
|
}
|
||||||
BUG_ON(mddev != bdev->bd_disk->private_data);
|
BUG_ON(mddev != bdev->bd_disk->private_data);
|
||||||
|
|
||||||
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
|
if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
err = 0;
|
err = 0;
|
||||||
atomic_inc(&mddev->openers);
|
atomic_inc(&mddev->openers);
|
||||||
mddev_unlock(mddev);
|
mutex_unlock(&mddev->open_mutex);
|
||||||
|
|
||||||
check_disk_change(bdev);
|
check_disk_change(bdev);
|
||||||
out:
|
out:
|
||||||
|
|
|
@ -223,6 +223,16 @@ struct mddev_s
|
||||||
* so we don't loop trying */
|
* so we don't loop trying */
|
||||||
|
|
||||||
int in_sync; /* know to not need resync */
|
int in_sync; /* know to not need resync */
|
||||||
|
/* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
|
||||||
|
* that we are never stopping an array while it is open.
|
||||||
|
* 'reconfig_mutex' protects all other reconfiguration.
|
||||||
|
* These locks are separate due to conflicting interactions
|
||||||
|
* with bdev->bd_mutex.
|
||||||
|
* Lock ordering is:
|
||||||
|
* reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
|
||||||
|
* bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
|
||||||
|
*/
|
||||||
|
struct mutex open_mutex;
|
||||||
struct mutex reconfig_mutex;
|
struct mutex reconfig_mutex;
|
||||||
atomic_t active; /* general refcount */
|
atomic_t active; /* general refcount */
|
||||||
atomic_t openers; /* number of active opens */
|
atomic_t openers; /* number of active opens */
|
||||||
|
|
|
@ -3785,7 +3785,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
|
||||||
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
|
conf->reshape_progress < raid5_size(mddev, 0, 0)) {
|
||||||
sector_nr = raid5_size(mddev, 0, 0)
|
sector_nr = raid5_size(mddev, 0, 0)
|
||||||
- conf->reshape_progress;
|
- conf->reshape_progress;
|
||||||
} else if (mddev->delta_disks > 0 &&
|
} else if (mddev->delta_disks >= 0 &&
|
||||||
conf->reshape_progress > 0)
|
conf->reshape_progress > 0)
|
||||||
sector_nr = conf->reshape_progress;
|
sector_nr = conf->reshape_progress;
|
||||||
sector_div(sector_nr, new_data_disks);
|
sector_div(sector_nr, new_data_disks);
|
||||||
|
@ -4509,7 +4509,26 @@ static int run(mddev_t *mddev)
|
||||||
(old_disks-max_degraded));
|
(old_disks-max_degraded));
|
||||||
/* here_old is the first stripe that we might need to read
|
/* here_old is the first stripe that we might need to read
|
||||||
* from */
|
* from */
|
||||||
if (here_new >= here_old) {
|
if (mddev->delta_disks == 0) {
|
||||||
|
/* We cannot be sure it is safe to start an in-place
|
||||||
|
* reshape. It is only safe if user-space if monitoring
|
||||||
|
* and taking constant backups.
|
||||||
|
* mdadm always starts a situation like this in
|
||||||
|
* readonly mode so it can take control before
|
||||||
|
* allowing any writes. So just check for that.
|
||||||
|
*/
|
||||||
|
if ((here_new * mddev->new_chunk_sectors !=
|
||||||
|
here_old * mddev->chunk_sectors) ||
|
||||||
|
mddev->ro == 0) {
|
||||||
|
printk(KERN_ERR "raid5: in-place reshape must be started"
|
||||||
|
" in read-only mode - aborting\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
} else if (mddev->delta_disks < 0
|
||||||
|
? (here_new * mddev->new_chunk_sectors <=
|
||||||
|
here_old * mddev->chunk_sectors)
|
||||||
|
: (here_new * mddev->new_chunk_sectors >=
|
||||||
|
here_old * mddev->chunk_sectors)) {
|
||||||
/* Reading from the same stripe as writing to - bad */
|
/* Reading from the same stripe as writing to - bad */
|
||||||
printk(KERN_ERR "raid5: reshape_position too early for "
|
printk(KERN_ERR "raid5: reshape_position too early for "
|
||||||
"auto-recovery - aborting.\n");
|
"auto-recovery - aborting.\n");
|
||||||
|
@ -5078,8 +5097,15 @@ static void raid5_finish_reshape(mddev_t *mddev)
|
||||||
mddev->degraded--;
|
mddev->degraded--;
|
||||||
for (d = conf->raid_disks ;
|
for (d = conf->raid_disks ;
|
||||||
d < conf->raid_disks - mddev->delta_disks;
|
d < conf->raid_disks - mddev->delta_disks;
|
||||||
d++)
|
d++) {
|
||||||
raid5_remove_disk(mddev, d);
|
mdk_rdev_t *rdev = conf->disks[d].rdev;
|
||||||
|
if (rdev && raid5_remove_disk(mddev, d) == 0) {
|
||||||
|
char nm[20];
|
||||||
|
sprintf(nm, "rd%d", rdev->raid_disk);
|
||||||
|
sysfs_remove_link(&mddev->kobj, nm);
|
||||||
|
rdev->raid_disk = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
mddev->layout = conf->algorithm;
|
mddev->layout = conf->algorithm;
|
||||||
mddev->chunk_sectors = conf->chunk_sectors;
|
mddev->chunk_sectors = conf->chunk_sectors;
|
||||||
|
|
Loading…
Reference in New Issue