md: allow a maximum extent to be set for resyncing

This allows userspace to control resync/reshape progress and synchronise it
with other activities, such as shared access in a SAN, or backing up critical
sections during a tricky reshape.

Writing a number of sectors (which must be a multiple of the chunk size if
such is meaningful) causes a resync to pause when it gets to that point.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
NeilBrown 2008-02-06 01:39:52 -08:00 committed by Linus Torvalds
parent c303da6d71
commit c620727779
6 changed files with 107 additions and 10 deletions

View File

@ -416,6 +416,16 @@ also have
sectors in total that could need to be processed. The two sectors in total that could need to be processed. The two
numbers are separated by a '/' thus effectively showing one numbers are separated by a '/' thus effectively showing one
value, a fraction of the process that is complete. value, a fraction of the process that is complete.
A 'select' on this attribute will return when resync completes,
when it reaches the current sync_max (below) and possibly at
other times.
sync_max
This is a number of sectors at which point a resync/recovery
process will pause. When a resync is active, the value can
only ever be increased, never decreased. The value of 'max'
effectively disables the limit.
sync_speed sync_speed
This shows the current actual speed, in K/sec, of the current This shows the current actual speed, in K/sec, of the current

View File

@ -275,6 +275,7 @@ static mddev_t * mddev_find(dev_t unit)
spin_lock_init(&new->write_lock); spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->sb_wait);
new->reshape_position = MaxSector; new->reshape_position = MaxSector;
new->resync_max = MaxSector;
new->queue = blk_alloc_queue(GFP_KERNEL); new->queue = blk_alloc_queue(GFP_KERNEL);
if (!new->queue) { if (!new->queue) {
@ -2920,6 +2921,43 @@ sync_completed_show(mddev_t *mddev, char *page)
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
static ssize_t
max_sync_show(mddev_t *mddev, char *page)
{
if (mddev->resync_max == MaxSector)
return sprintf(page, "max\n");
else
return sprintf(page, "%llu\n",
(unsigned long long)mddev->resync_max);
}
static ssize_t
max_sync_store(mddev_t *mddev, const char *buf, size_t len)
{
if (strncmp(buf, "max", 3) == 0)
mddev->resync_max = MaxSector;
else {
char *ep;
unsigned long long max = simple_strtoull(buf, &ep, 10);
if (ep == buf || (*ep != 0 && *ep != '\n'))
return -EINVAL;
if (max < mddev->resync_max &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
/* Must be a multiple of chunk_size */
if (mddev->chunk_size) {
if (max & (sector_t)((mddev->chunk_size>>9)-1))
return -EINVAL;
}
mddev->resync_max = max;
}
wake_up(&mddev->recovery_wait);
return len;
}
static struct md_sysfs_entry md_max_sync =
__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
static ssize_t static ssize_t
suspend_lo_show(mddev_t *mddev, char *page) suspend_lo_show(mddev_t *mddev, char *page)
{ {
@ -3030,6 +3068,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_sync_max.attr, &md_sync_max.attr,
&md_sync_speed.attr, &md_sync_speed.attr,
&md_sync_completed.attr, &md_sync_completed.attr,
&md_max_sync.attr,
&md_suspend_lo.attr, &md_suspend_lo.attr,
&md_suspend_hi.attr, &md_suspend_hi.attr,
&md_bitmap.attr, &md_bitmap.attr,
@ -3579,6 +3618,7 @@ static int do_md_stop(mddev_t * mddev, int mode)
mddev->size = 0; mddev->size = 0;
mddev->raid_disks = 0; mddev->raid_disks = 0;
mddev->recovery_cp = 0; mddev->recovery_cp = 0;
mddev->resync_max = MaxSector;
mddev->reshape_position = MaxSector; mddev->reshape_position = MaxSector;
mddev->external = 0; mddev->external = 0;
@ -5443,8 +5483,16 @@ void md_do_sync(mddev_t *mddev)
sector_t sectors; sector_t sectors;
skipped = 0; skipped = 0;
if (j >= mddev->resync_max) {
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wait_event(mddev->recovery_wait,
mddev->resync_max > j
|| kthread_should_stop());
}
if (kthread_should_stop())
goto interrupted;
sectors = mddev->pers->sync_request(mddev, j, &skipped, sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev)); currspeed < speed_min(mddev));
if (sectors == 0) { if (sectors == 0) {
set_bit(MD_RECOVERY_ERR, &mddev->recovery); set_bit(MD_RECOVERY_ERR, &mddev->recovery);
goto out; goto out;
@ -5486,15 +5534,9 @@ void md_do_sync(mddev_t *mddev)
} }
if (kthread_should_stop()) { if (kthread_should_stop())
/* goto interrupted;
* got a signal, exit.
*/
printk(KERN_INFO
"md: md_do_sync() got signal ... exiting\n");
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
}
/* /*
* this loop exits only if either when we are slower than * this loop exits only if either when we are slower than
@ -5558,9 +5600,22 @@ void md_do_sync(mddev_t *mddev)
skip: skip:
mddev->curr_resync = 0; mddev->curr_resync = 0;
mddev->resync_max = MaxSector;
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
wake_up(&resync_wait); wake_up(&resync_wait);
set_bit(MD_RECOVERY_DONE, &mddev->recovery); set_bit(MD_RECOVERY_DONE, &mddev->recovery);
md_wakeup_thread(mddev->thread); md_wakeup_thread(mddev->thread);
return;
interrupted:
/*
* got a signal, exit.
*/
printk(KERN_INFO
"md: md_do_sync() got signal ... exiting\n");
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
} }
EXPORT_SYMBOL_GPL(md_do_sync); EXPORT_SYMBOL_GPL(md_do_sync);

View File

@ -1767,6 +1767,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return rv; return rv;
} }
if (max_sector > mddev->resync_max)
max_sector = mddev->resync_max; /* Don't do IO beyond here */
nr_sectors = 0; nr_sectors = 0;
sync_blocks = 0; sync_blocks = 0;
do { do {

View File

@ -1657,6 +1657,9 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
return (max_sector - sector_nr) + sectors_skipped; return (max_sector - sector_nr) + sectors_skipped;
} }
if (max_sector > mddev->resync_max)
max_sector = mddev->resync_max; /* Don't do IO beyond here */
/* make sure whole request will fit in a chunk - if chunks /* make sure whole request will fit in a chunk - if chunks
* are meaningful * are meaningful
*/ */

View File

@ -3698,6 +3698,25 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped
release_stripe(sh); release_stripe(sh);
first_sector += STRIPE_SECTORS; first_sector += STRIPE_SECTORS;
} }
/* If this takes us to the resync_max point where we have to pause,
* then we need to write out the superblock.
*/
sector_nr += conf->chunk_size>>9;
if (sector_nr >= mddev->resync_max) {
/* Cannot proceed until we've updated the superblock... */
wait_event(conf->wait_for_overlap,
atomic_read(&conf->reshape_stripes) == 0);
mddev->reshape_position = conf->expand_progress;
set_bit(MD_CHANGE_DEVS, &mddev->flags);
md_wakeup_thread(mddev->thread);
wait_event(mddev->sb_wait,
!test_bit(MD_CHANGE_DEVS, &mddev->flags)
|| kthread_should_stop());
spin_lock_irq(&conf->device_lock);
conf->expand_lo = mddev->reshape_position;
spin_unlock_irq(&conf->device_lock);
wake_up(&conf->wait_for_overlap);
}
return conf->chunk_size>>9; return conf->chunk_size>>9;
} }
@ -3734,6 +3753,12 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return reshape_request(mddev, sector_nr, skipped); return reshape_request(mddev, sector_nr, skipped);
/* No need to check resync_max as we never do more than one
* stripe, and as resync_max will always be on a chunk boundary,
* if the check in md_do_sync didn't fire, there is no chance
* of overstepping resync_max here
*/
/* if there is too many failed drives and we are trying /* if there is too many failed drives and we are trying
* to resync, then assert that we are finished, because there is * to resync, then assert that we are finished, because there is
* nothing we can do. * nothing we can do.

View File

@ -219,6 +219,8 @@ struct mddev_s
atomic_t recovery_active; /* blocks scheduled, but not written */ atomic_t recovery_active; /* blocks scheduled, but not written */
wait_queue_head_t recovery_wait; wait_queue_head_t recovery_wait;
sector_t recovery_cp; sector_t recovery_cp;
sector_t resync_max; /* resync should pause
* when it gets here */
spinlock_t write_lock; spinlock_t write_lock;
wait_queue_head_t sb_wait; /* for waiting on superblock updates */ wait_queue_head_t sb_wait; /* for waiting on superblock updates */