Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md
Pull MD updates from Shaohua Li: - A bunch of patches from Neil Brown to fix RCU usage - Two performance improvement patches from Tomasz Majchrzak - Alexey Obitotskiy fixes module refcount issue - Arnd Bergmann fixes time granularity - Cong Wang fixes a list corruption issue - Guoqing Jiang fixes a deadlock in md-cluster - A null pointer deference fix from me - Song Liu fixes misuse of raid6 rmw - Other trival/cleanup fixes from Guoqing Jiang and Xiao Ni * 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shli/md: (28 commits) MD: fix null pointer deference raid10: improve random reads performance md: add missing sysfs_notify on array_state update Fix kernel module refcount handling md: use seconds granularity for error logging md: reduce the number of synchronize_rcu() calls when multiple devices fail. md: be extra careful not to take a reference to a Faulty device. md/multipath: add rcu protection to rdev access in multipath_status. md/raid5: add rcu protection to rdev accesses in raid5_status. md/raid5: add rcu protection to rdev accesses in want_replace md/raid5: add rcu protection to rdev accesses in handle_failed_sync. md/raid1: add rcu protection to rdev in fix_read_error md/raid1: small code cleanup in end_sync_write md/raid1: small cleanup in raid1_end_read/write_request md/raid10: simplify print_conf a little. md/raid10: minor code improvement in fix_read_error() md/raid10: add rcu protection to rdev access during reshape. md/raid10: add rcu protection to rdev access in raid10_sync_request. md/raid10: add rcu protection in raid10_status. md/raid10: fix refounct imbalance when resyncing an array with a replacement device. ...
This commit is contained in:
commit
867900b5ec
|
@ -2482,8 +2482,7 @@ static int add_bound_rdev(struct md_rdev *rdev)
|
|||
if (add_journal)
|
||||
mddev_resume(mddev);
|
||||
if (err) {
|
||||
unbind_rdev_from_array(rdev);
|
||||
export_rdev(rdev);
|
||||
md_kick_rdev_from_array(rdev);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
@ -2600,6 +2599,10 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
|
|||
else
|
||||
err = -EBUSY;
|
||||
} else if (cmd_match(buf, "remove")) {
|
||||
if (rdev->mddev->pers) {
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
remove_and_add_spares(rdev->mddev, rdev);
|
||||
}
|
||||
if (rdev->raid_disk >= 0)
|
||||
err = -EBUSY;
|
||||
else {
|
||||
|
@ -3176,8 +3179,7 @@ int md_rdev_init(struct md_rdev *rdev)
|
|||
rdev->data_offset = 0;
|
||||
rdev->new_data_offset = 0;
|
||||
rdev->sb_events = 0;
|
||||
rdev->last_read_error.tv_sec = 0;
|
||||
rdev->last_read_error.tv_nsec = 0;
|
||||
rdev->last_read_error = 0;
|
||||
rdev->sb_loaded = 0;
|
||||
rdev->bb_page = NULL;
|
||||
atomic_set(&rdev->nr_pending, 0);
|
||||
|
@ -3583,6 +3585,8 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
mddev->to_remove = &md_redundancy_group;
|
||||
}
|
||||
|
||||
module_put(oldpers->owner);
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
|
@ -3940,6 +3944,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
} else
|
||||
err = -EBUSY;
|
||||
}
|
||||
if (!err)
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_state);
|
||||
spin_unlock(&mddev->lock);
|
||||
return err ?: len;
|
||||
}
|
||||
|
@ -4191,7 +4197,8 @@ size_store(struct mddev *mddev, const char *buf, size_t len)
|
|||
return err;
|
||||
if (mddev->pers) {
|
||||
err = update_size(mddev, sectors);
|
||||
md_update_sb(mddev, 1);
|
||||
if (err == 0)
|
||||
md_update_sb(mddev, 1);
|
||||
} else {
|
||||
if (mddev->dev_sectors == 0 ||
|
||||
mddev->dev_sectors > sectors)
|
||||
|
@ -7813,6 +7820,7 @@ void md_do_sync(struct md_thread *thread)
|
|||
if (ret)
|
||||
goto skip;
|
||||
|
||||
set_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags);
|
||||
if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
|
||||
|
@ -8151,18 +8159,11 @@ void md_do_sync(struct md_thread *thread)
|
|||
}
|
||||
}
|
||||
skip:
|
||||
if (mddev_is_clustered(mddev) &&
|
||||
ret == 0) {
|
||||
/* set CHANGE_PENDING here since maybe another
|
||||
* update is needed, so other nodes are informed */
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
|
||||
md_wakeup_thread(mddev->thread);
|
||||
wait_event(mddev->sb_wait,
|
||||
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
} else
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
/* set CHANGE_PENDING here since maybe another update is needed,
|
||||
* so other nodes are informed. It should be harmless for normal
|
||||
* raid */
|
||||
set_mask_bits(&mddev->flags, 0,
|
||||
BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS));
|
||||
|
||||
spin_lock(&mddev->lock);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
|
||||
|
@ -8188,15 +8189,34 @@ static int remove_and_add_spares(struct mddev *mddev,
|
|||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int removed = 0;
|
||||
bool remove_some = false;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
(test_bit(Faulty, &rdev->flags) ||
|
||||
test_bit(Faulty, &rdev->flags) &&
|
||||
atomic_read(&rdev->nr_pending)==0) {
|
||||
/* Faulty non-Blocked devices with nr_pending == 0
|
||||
* never get nr_pending incremented,
|
||||
* never get Faulty cleared, and never get Blocked set.
|
||||
* So we can synchronize_rcu now rather than once per device
|
||||
*/
|
||||
remove_some = true;
|
||||
set_bit(RemoveSynchronized, &rdev->flags);
|
||||
}
|
||||
}
|
||||
|
||||
if (remove_some)
|
||||
synchronize_rcu();
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
((test_bit(RemoveSynchronized, &rdev->flags) ||
|
||||
(!test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Journal, &rdev->flags))) &&
|
||||
atomic_read(&rdev->nr_pending)==0) {
|
||||
atomic_read(&rdev->nr_pending)==0)) {
|
||||
if (mddev->pers->hot_remove_disk(
|
||||
mddev, rdev) == 0) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
|
@ -8204,6 +8224,10 @@ static int remove_and_add_spares(struct mddev *mddev,
|
|||
removed++;
|
||||
}
|
||||
}
|
||||
if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
|
||||
clear_bit(RemoveSynchronized, &rdev->flags);
|
||||
}
|
||||
|
||||
if (removed && mddev->kobj.sd)
|
||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||
|
||||
|
@ -8506,6 +8530,11 @@ void md_reap_sync_thread(struct mddev *mddev)
|
|||
rdev->saved_raid_disk = -1;
|
||||
|
||||
md_update_sb(mddev, 1);
|
||||
/* MD_CHANGE_PENDING should be cleared by md_update_sb, so we can
|
||||
* call resync_finish here if MD_CLUSTER_RESYNC_LOCKED is set by
|
||||
* clustered raid */
|
||||
if (test_and_clear_bit(MD_CLUSTER_RESYNC_LOCKED, &mddev->flags))
|
||||
md_cluster_ops->resync_finish(mddev);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
|
@ -8803,6 +8832,7 @@ EXPORT_SYMBOL(md_reload_sb);
|
|||
* at boot time.
|
||||
*/
|
||||
|
||||
static DEFINE_MUTEX(detected_devices_mutex);
|
||||
static LIST_HEAD(all_detected_devices);
|
||||
struct detected_devices_node {
|
||||
struct list_head list;
|
||||
|
@ -8816,7 +8846,9 @@ void md_autodetect_dev(dev_t dev)
|
|||
node_detected_dev = kzalloc(sizeof(*node_detected_dev), GFP_KERNEL);
|
||||
if (node_detected_dev) {
|
||||
node_detected_dev->dev = dev;
|
||||
mutex_lock(&detected_devices_mutex);
|
||||
list_add_tail(&node_detected_dev->list, &all_detected_devices);
|
||||
mutex_unlock(&detected_devices_mutex);
|
||||
} else {
|
||||
printk(KERN_CRIT "md: md_autodetect_dev: kzalloc failed"
|
||||
", skipping dev(%d,%d)\n", MAJOR(dev), MINOR(dev));
|
||||
|
@ -8835,6 +8867,7 @@ static void autostart_arrays(int part)
|
|||
|
||||
printk(KERN_INFO "md: Autodetecting RAID arrays.\n");
|
||||
|
||||
mutex_lock(&detected_devices_mutex);
|
||||
while (!list_empty(&all_detected_devices) && i_scanned < INT_MAX) {
|
||||
i_scanned++;
|
||||
node_detected_dev = list_entry(all_detected_devices.next,
|
||||
|
@ -8853,6 +8886,7 @@ static void autostart_arrays(int part)
|
|||
list_add(&rdev->same_set, &pending_raid_disks);
|
||||
i_passed++;
|
||||
}
|
||||
mutex_unlock(&detected_devices_mutex);
|
||||
|
||||
printk(KERN_INFO "md: Scanned %d and added %d devices.\n",
|
||||
i_scanned, i_passed);
|
||||
|
|
|
@ -99,7 +99,7 @@ struct md_rdev {
|
|||
atomic_t read_errors; /* number of consecutive read errors that
|
||||
* we have tried to ignore.
|
||||
*/
|
||||
struct timespec last_read_error; /* monotonic time since our
|
||||
time64_t last_read_error; /* monotonic time since our
|
||||
* last read error
|
||||
*/
|
||||
atomic_t corrected_errors; /* number of corrected read errors,
|
||||
|
@ -163,6 +163,11 @@ enum flag_bits {
|
|||
* than other devices in the array
|
||||
*/
|
||||
ClusterRemove,
|
||||
RemoveSynchronized, /* synchronize_rcu() was called after
|
||||
* this device was known to be faulty,
|
||||
* so it is safe to remove without
|
||||
* another synchronize_rcu() call.
|
||||
*/
|
||||
};
|
||||
|
||||
static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
|
||||
|
@ -204,6 +209,9 @@ struct mddev {
|
|||
#define MD_RELOAD_SB 7 /* Reload the superblock because another node
|
||||
* updated it.
|
||||
*/
|
||||
#define MD_CLUSTER_RESYNC_LOCKED 8 /* cluster raid only, which means node
|
||||
* already took resync lock, need to
|
||||
* release the lock */
|
||||
|
||||
int suspended;
|
||||
atomic_t active_io;
|
||||
|
|
|
@ -43,7 +43,8 @@ static int multipath_map (struct mpconf *conf)
|
|||
rcu_read_lock();
|
||||
for (i = 0; i < disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
|
||||
if (rdev && test_bit(In_sync, &rdev->flags)) {
|
||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
return i;
|
||||
|
@ -141,17 +142,19 @@ static void multipath_make_request(struct mddev *mddev, struct bio * bio)
|
|||
return;
|
||||
}
|
||||
|
||||
static void multipath_status (struct seq_file *seq, struct mddev *mddev)
|
||||
static void multipath_status(struct seq_file *seq, struct mddev *mddev)
|
||||
{
|
||||
struct mpconf *conf = mddev->private;
|
||||
int i;
|
||||
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks,
|
||||
conf->raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
seq_printf (seq, "%s",
|
||||
conf->multipaths[i].rdev &&
|
||||
test_bit(In_sync, &conf->multipaths[i].rdev->flags) ? "U" : "_");
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->multipaths[i].rdev);
|
||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf (seq, "]");
|
||||
}
|
||||
|
||||
|
@ -295,12 +298,14 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
goto abort;
|
||||
}
|
||||
p->rdev = NULL;
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
err = md_integrity_register(mddev);
|
||||
}
|
||||
|
|
|
@ -319,14 +319,13 @@ static void raid1_end_read_request(struct bio *bio)
|
|||
{
|
||||
int uptodate = !bio->bi_error;
|
||||
struct r1bio *r1_bio = bio->bi_private;
|
||||
int mirror;
|
||||
struct r1conf *conf = r1_bio->mddev->private;
|
||||
struct md_rdev *rdev = conf->mirrors[r1_bio->read_disk].rdev;
|
||||
|
||||
mirror = r1_bio->read_disk;
|
||||
/*
|
||||
* this branch is our 'one mirror IO has finished' event handler:
|
||||
*/
|
||||
update_head_pos(mirror, r1_bio);
|
||||
update_head_pos(r1_bio->read_disk, r1_bio);
|
||||
|
||||
if (uptodate)
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
|
@ -339,14 +338,14 @@ static void raid1_end_read_request(struct bio *bio)
|
|||
spin_lock_irqsave(&conf->device_lock, flags);
|
||||
if (r1_bio->mddev->degraded == conf->raid_disks ||
|
||||
(r1_bio->mddev->degraded == conf->raid_disks-1 &&
|
||||
test_bit(In_sync, &conf->mirrors[mirror].rdev->flags)))
|
||||
test_bit(In_sync, &rdev->flags)))
|
||||
uptodate = 1;
|
||||
spin_unlock_irqrestore(&conf->device_lock, flags);
|
||||
}
|
||||
|
||||
if (uptodate) {
|
||||
raid_end_bio_io(r1_bio);
|
||||
rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
} else {
|
||||
/*
|
||||
* oops, read error:
|
||||
|
@ -356,7 +355,7 @@ static void raid1_end_read_request(struct bio *bio)
|
|||
KERN_ERR "md/raid1:%s: %s: "
|
||||
"rescheduling sector %llu\n",
|
||||
mdname(conf->mddev),
|
||||
bdevname(conf->mirrors[mirror].rdev->bdev,
|
||||
bdevname(rdev->bdev,
|
||||
b),
|
||||
(unsigned long long)r1_bio->sector);
|
||||
set_bit(R1BIO_ReadError, &r1_bio->state);
|
||||
|
@ -403,20 +402,18 @@ static void r1_bio_write_done(struct r1bio *r1_bio)
|
|||
static void raid1_end_write_request(struct bio *bio)
|
||||
{
|
||||
struct r1bio *r1_bio = bio->bi_private;
|
||||
int mirror, behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
int behind = test_bit(R1BIO_BehindIO, &r1_bio->state);
|
||||
struct r1conf *conf = r1_bio->mddev->private;
|
||||
struct bio *to_put = NULL;
|
||||
|
||||
mirror = find_bio_disk(r1_bio, bio);
|
||||
int mirror = find_bio_disk(r1_bio, bio);
|
||||
struct md_rdev *rdev = conf->mirrors[mirror].rdev;
|
||||
|
||||
/*
|
||||
* 'one mirror IO has finished' event handler:
|
||||
*/
|
||||
if (bio->bi_error) {
|
||||
set_bit(WriteErrorSeen,
|
||||
&conf->mirrors[mirror].rdev->flags);
|
||||
if (!test_and_set_bit(WantReplacement,
|
||||
&conf->mirrors[mirror].rdev->flags))
|
||||
set_bit(WriteErrorSeen, &rdev->flags);
|
||||
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||
set_bit(MD_RECOVERY_NEEDED, &
|
||||
conf->mddev->recovery);
|
||||
|
||||
|
@ -445,13 +442,12 @@ static void raid1_end_write_request(struct bio *bio)
|
|||
* before rdev->recovery_offset, but for simplicity we don't
|
||||
* check this here.
|
||||
*/
|
||||
if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
|
||||
!test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
|
||||
if (test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||
|
||||
/* Maybe we can clear some bad blocks. */
|
||||
if (is_badblock(conf->mirrors[mirror].rdev,
|
||||
r1_bio->sector, r1_bio->sectors,
|
||||
if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
|
||||
&first_bad, &bad_sectors)) {
|
||||
r1_bio->bios[mirror] = IO_MADE_GOOD;
|
||||
set_bit(R1BIO_MadeGood, &r1_bio->state);
|
||||
|
@ -459,7 +455,7 @@ static void raid1_end_write_request(struct bio *bio)
|
|||
}
|
||||
|
||||
if (behind) {
|
||||
if (test_bit(WriteMostly, &conf->mirrors[mirror].rdev->flags))
|
||||
if (test_bit(WriteMostly, &rdev->flags))
|
||||
atomic_dec(&r1_bio->behind_remaining);
|
||||
|
||||
/*
|
||||
|
@ -483,8 +479,7 @@ static void raid1_end_write_request(struct bio *bio)
|
|||
}
|
||||
}
|
||||
if (r1_bio->bios[mirror] == NULL)
|
||||
rdev_dec_pending(conf->mirrors[mirror].rdev,
|
||||
conf->mddev);
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
/*
|
||||
* Let's see if all mirrored write operations have finished
|
||||
|
@ -689,13 +684,6 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
|
|||
if (!rdev)
|
||||
goto retry;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
/* cannot risk returning a device that failed
|
||||
* before we inc'ed nr_pending
|
||||
*/
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
goto retry;
|
||||
}
|
||||
sectors = best_good_sectors;
|
||||
|
||||
if (conf->mirrors[best_disk].next_seq_sect != this_sector)
|
||||
|
@ -1666,13 +1654,16 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
goto abort;
|
||||
}
|
||||
p->rdev = NULL;
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
} else if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
p->rdev = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
if (conf->mirrors[conf->raid_disks + number].rdev) {
|
||||
/* We just removed a device that is being replaced.
|
||||
* Move down the replacement. We drain all IO before
|
||||
* doing this to avoid confusion.
|
||||
|
@ -1719,11 +1710,9 @@ static void end_sync_write(struct bio *bio)
|
|||
struct r1bio *r1_bio = bio->bi_private;
|
||||
struct mddev *mddev = r1_bio->mddev;
|
||||
struct r1conf *conf = mddev->private;
|
||||
int mirror=0;
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
mirror = find_bio_disk(r1_bio, bio);
|
||||
struct md_rdev *rdev = conf->mirrors[find_bio_disk(r1_bio, bio)].rdev;
|
||||
|
||||
if (!uptodate) {
|
||||
sector_t sync_blocks = 0;
|
||||
|
@ -1736,16 +1725,12 @@ static void end_sync_write(struct bio *bio)
|
|||
s += sync_blocks;
|
||||
sectors_to_go -= sync_blocks;
|
||||
} while (sectors_to_go > 0);
|
||||
set_bit(WriteErrorSeen,
|
||||
&conf->mirrors[mirror].rdev->flags);
|
||||
if (!test_and_set_bit(WantReplacement,
|
||||
&conf->mirrors[mirror].rdev->flags))
|
||||
set_bit(WriteErrorSeen, &rdev->flags);
|
||||
if (!test_and_set_bit(WantReplacement, &rdev->flags))
|
||||
set_bit(MD_RECOVERY_NEEDED, &
|
||||
mddev->recovery);
|
||||
set_bit(R1BIO_WriteError, &r1_bio->state);
|
||||
} else if (is_badblock(conf->mirrors[mirror].rdev,
|
||||
r1_bio->sector,
|
||||
r1_bio->sectors,
|
||||
} else if (is_badblock(rdev, r1_bio->sector, r1_bio->sectors,
|
||||
&first_bad, &bad_sectors) &&
|
||||
!is_badblock(conf->mirrors[r1_bio->read_disk].rdev,
|
||||
r1_bio->sector,
|
||||
|
@ -2072,29 +2057,30 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
|||
s = PAGE_SIZE >> 9;
|
||||
|
||||
do {
|
||||
/* Note: no rcu protection needed here
|
||||
* as this is synchronous in the raid1d thread
|
||||
* which is the thread that might remove
|
||||
* a device. If raid1d ever becomes multi-threaded....
|
||||
*/
|
||||
sector_t first_bad;
|
||||
int bad_sectors;
|
||||
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev &&
|
||||
(test_bit(In_sync, &rdev->flags) ||
|
||||
(!test_bit(Faulty, &rdev->flags) &&
|
||||
rdev->recovery_offset >= sect + s)) &&
|
||||
is_badblock(rdev, sect, s,
|
||||
&first_bad, &bad_sectors) == 0 &&
|
||||
sync_page_io(rdev, sect, s<<9,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
if (sync_page_io(rdev, sect, s<<9,
|
||||
conf->tmppage, REQ_OP_READ, 0, false))
|
||||
success = 1;
|
||||
else {
|
||||
d++;
|
||||
if (d == conf->raid_disks * 2)
|
||||
d = 0;
|
||||
}
|
||||
success = 1;
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
if (success)
|
||||
break;
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
d++;
|
||||
if (d == conf->raid_disks * 2)
|
||||
d = 0;
|
||||
} while (!success && d != read_disk);
|
||||
|
||||
if (!success) {
|
||||
|
@ -2110,11 +2096,17 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
|||
if (d==0)
|
||||
d = conf->raid_disks * 2;
|
||||
d--;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
r1_sync_page_io(rdev, sect, s,
|
||||
conf->tmppage, WRITE);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
}
|
||||
d = start;
|
||||
while (d != read_disk) {
|
||||
|
@ -2122,9 +2114,12 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
|||
if (d==0)
|
||||
d = conf->raid_disks * 2;
|
||||
d--;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev &&
|
||||
!test_bit(Faulty, &rdev->flags)) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
if (r1_sync_page_io(rdev, sect, s,
|
||||
conf->tmppage, READ)) {
|
||||
atomic_add(s, &rdev->corrected_errors);
|
||||
|
@ -2133,10 +2128,12 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
|
|||
"(%d sectors at %llu on %s)\n",
|
||||
mdname(mddev), s,
|
||||
(unsigned long long)(sect +
|
||||
rdev->data_offset),
|
||||
rdev->data_offset),
|
||||
bdevname(rdev->bdev, b));
|
||||
}
|
||||
}
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
} else
|
||||
rcu_read_unlock();
|
||||
}
|
||||
sectors -= s;
|
||||
sect += s;
|
||||
|
@ -2534,6 +2531,13 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
return sync_blocks;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there is non-resync activity waiting for a turn, then let it
|
||||
* though before starting on this new sync request.
|
||||
*/
|
||||
if (conf->nr_waiting)
|
||||
schedule_timeout_uninterruptible(1);
|
||||
|
||||
/* we are incrementing sector_nr below. To be safe, we check against
|
||||
* sector_nr + two times RESYNC_SECTORS
|
||||
*/
|
||||
|
|
|
@ -707,7 +707,6 @@ static struct md_rdev *read_balance(struct r10conf *conf,
|
|||
|
||||
raid10_find_phys(conf, r10_bio);
|
||||
rcu_read_lock();
|
||||
retry:
|
||||
sectors = r10_bio->sectors;
|
||||
best_slot = -1;
|
||||
best_rdev = NULL;
|
||||
|
@ -804,13 +803,6 @@ retry:
|
|||
|
||||
if (slot >= 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
if (test_bit(Faulty, &rdev->flags)) {
|
||||
/* Cannot risk returning a device that failed
|
||||
* before we inc'ed nr_pending
|
||||
*/
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
goto retry;
|
||||
}
|
||||
r10_bio->read_slot = slot;
|
||||
} else
|
||||
rdev = NULL;
|
||||
|
@ -913,7 +905,7 @@ static void raise_barrier(struct r10conf *conf, int force)
|
|||
|
||||
/* Now wait for all pending IO to complete */
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
!conf->nr_pending && conf->barrier < RESYNC_DEPTH,
|
||||
!atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
|
||||
conf->resync_lock);
|
||||
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
|
@ -944,23 +936,23 @@ static void wait_barrier(struct r10conf *conf)
|
|||
*/
|
||||
wait_event_lock_irq(conf->wait_barrier,
|
||||
!conf->barrier ||
|
||||
(conf->nr_pending &&
|
||||
(atomic_read(&conf->nr_pending) &&
|
||||
current->bio_list &&
|
||||
!bio_list_empty(current->bio_list)),
|
||||
conf->resync_lock);
|
||||
conf->nr_waiting--;
|
||||
if (!conf->nr_waiting)
|
||||
wake_up(&conf->wait_barrier);
|
||||
}
|
||||
conf->nr_pending++;
|
||||
atomic_inc(&conf->nr_pending);
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
|
||||
static void allow_barrier(struct r10conf *conf)
|
||||
{
|
||||
unsigned long flags;
|
||||
spin_lock_irqsave(&conf->resync_lock, flags);
|
||||
conf->nr_pending--;
|
||||
spin_unlock_irqrestore(&conf->resync_lock, flags);
|
||||
wake_up(&conf->wait_barrier);
|
||||
if ((atomic_dec_and_test(&conf->nr_pending)) ||
|
||||
(conf->array_freeze_pending))
|
||||
wake_up(&conf->wait_barrier);
|
||||
}
|
||||
|
||||
static void freeze_array(struct r10conf *conf, int extra)
|
||||
|
@ -978,13 +970,15 @@ static void freeze_array(struct r10conf *conf, int extra)
|
|||
* we continue.
|
||||
*/
|
||||
spin_lock_irq(&conf->resync_lock);
|
||||
conf->array_freeze_pending++;
|
||||
conf->barrier++;
|
||||
conf->nr_waiting++;
|
||||
wait_event_lock_irq_cmd(conf->wait_barrier,
|
||||
conf->nr_pending == conf->nr_queued+extra,
|
||||
atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
|
||||
conf->resync_lock,
|
||||
flush_pending_writes(conf));
|
||||
|
||||
conf->array_freeze_pending--;
|
||||
spin_unlock_irq(&conf->resync_lock);
|
||||
}
|
||||
|
||||
|
@ -1499,10 +1493,12 @@ static void raid10_status(struct seq_file *seq, struct mddev *mddev)
|
|||
}
|
||||
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
|
||||
conf->geo.raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->geo.raid_disks; i++)
|
||||
seq_printf(seq, "%s",
|
||||
conf->mirrors[i].rdev &&
|
||||
test_bit(In_sync, &conf->mirrors[i].rdev->flags) ? "U" : "_");
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
|
||||
seq_printf(seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf(seq, "]");
|
||||
}
|
||||
|
||||
|
@ -1600,7 +1596,7 @@ static void raid10_error(struct mddev *mddev, struct md_rdev *rdev)
|
|||
static void print_conf(struct r10conf *conf)
|
||||
{
|
||||
int i;
|
||||
struct raid10_info *tmp;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
printk(KERN_DEBUG "RAID10 conf printout:\n");
|
||||
if (!conf) {
|
||||
|
@ -1610,14 +1606,16 @@ static void print_conf(struct r10conf *conf)
|
|||
printk(KERN_DEBUG " --- wd:%d rd:%d\n", conf->geo.raid_disks - conf->mddev->degraded,
|
||||
conf->geo.raid_disks);
|
||||
|
||||
/* This is only called with ->reconfix_mutex held, so
|
||||
* rcu protection of rdev is not needed */
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
char b[BDEVNAME_SIZE];
|
||||
tmp = conf->mirrors + i;
|
||||
if (tmp->rdev)
|
||||
rdev = conf->mirrors[i].rdev;
|
||||
if (rdev)
|
||||
printk(KERN_DEBUG " disk %d, wo:%d, o:%d, dev:%s\n",
|
||||
i, !test_bit(In_sync, &tmp->rdev->flags),
|
||||
!test_bit(Faulty, &tmp->rdev->flags),
|
||||
bdevname(tmp->rdev->bdev,b));
|
||||
i, !test_bit(In_sync, &rdev->flags),
|
||||
!test_bit(Faulty, &rdev->flags),
|
||||
bdevname(rdev->bdev,b));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1766,7 +1764,7 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
err = -EBUSY;
|
||||
goto abort;
|
||||
}
|
||||
/* Only remove faulty devices if recovery
|
||||
/* Only remove non-faulty devices if recovery
|
||||
* is not possible.
|
||||
*/
|
||||
if (!test_bit(Faulty, &rdev->flags) &&
|
||||
|
@ -1778,13 +1776,16 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
goto abort;
|
||||
} else if (p->replacement) {
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
goto abort;
|
||||
}
|
||||
}
|
||||
if (p->replacement) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
p->rdev = p->replacement;
|
||||
clear_bit(Replacement, &p->replacement->flags);
|
||||
|
@ -2171,21 +2172,20 @@ static void recovery_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
|||
*/
|
||||
static void check_decay_read_errors(struct mddev *mddev, struct md_rdev *rdev)
|
||||
{
|
||||
struct timespec cur_time_mon;
|
||||
long cur_time_mon;
|
||||
unsigned long hours_since_last;
|
||||
unsigned int read_errors = atomic_read(&rdev->read_errors);
|
||||
|
||||
ktime_get_ts(&cur_time_mon);
|
||||
cur_time_mon = ktime_get_seconds();
|
||||
|
||||
if (rdev->last_read_error.tv_sec == 0 &&
|
||||
rdev->last_read_error.tv_nsec == 0) {
|
||||
if (rdev->last_read_error == 0) {
|
||||
/* first time we've seen a read error */
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
return;
|
||||
}
|
||||
|
||||
hours_since_last = (cur_time_mon.tv_sec -
|
||||
rdev->last_read_error.tv_sec) / 3600;
|
||||
hours_since_last = (long)(cur_time_mon -
|
||||
rdev->last_read_error) / 3600;
|
||||
|
||||
rdev->last_read_error = cur_time_mon;
|
||||
|
||||
|
@ -2264,7 +2264,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
|||
printk(KERN_NOTICE
|
||||
"md/raid10:%s: %s: Failing raid device\n",
|
||||
mdname(mddev), b);
|
||||
md_error(mddev, conf->mirrors[d].rdev);
|
||||
md_error(mddev, rdev);
|
||||
r10_bio->devs[r10_bio->read_slot].bio = IO_BLOCKED;
|
||||
return;
|
||||
}
|
||||
|
@ -2287,6 +2287,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
|||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev &&
|
||||
test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags) &&
|
||||
is_badblock(rdev, r10_bio->devs[sl].addr + sect, s,
|
||||
&first_bad, &bad_sectors) == 0) {
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
|
@ -2340,6 +2341,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
|||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (!rdev ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
|
@ -2379,6 +2381,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
|
|||
d = r10_bio->devs[sl].devnum;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (!rdev ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
|
||||
|
@ -2876,11 +2879,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
/* Completed a full sync so the replacements
|
||||
* are now fully recovered.
|
||||
*/
|
||||
for (i = 0; i < conf->geo.raid_disks; i++)
|
||||
if (conf->mirrors[i].replacement)
|
||||
conf->mirrors[i].replacement
|
||||
->recovery_offset
|
||||
= MaxSector;
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->geo.raid_disks; i++) {
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference(conf->mirrors[i].replacement);
|
||||
if (rdev)
|
||||
rdev->recovery_offset = MaxSector;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
conf->fullsync = 0;
|
||||
}
|
||||
|
@ -2911,6 +2917,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
max_sector > (sector_nr | chunk_mask))
|
||||
max_sector = (sector_nr | chunk_mask) + 1;
|
||||
|
||||
/*
|
||||
* If there is non-resync activity waiting for a turn, then let it
|
||||
* though before starting on this new sync request.
|
||||
*/
|
||||
if (conf->nr_waiting)
|
||||
schedule_timeout_uninterruptible(1);
|
||||
|
||||
/* Again, very different code for resync and recovery.
|
||||
* Both must result in an r10bio with a list of bios that
|
||||
* have bi_end_io, bi_sector, bi_bdev set,
|
||||
|
@ -2939,14 +2952,20 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
int must_sync;
|
||||
int any_working;
|
||||
struct raid10_info *mirror = &conf->mirrors[i];
|
||||
struct md_rdev *mrdev, *mreplace;
|
||||
|
||||
if ((mirror->rdev == NULL ||
|
||||
test_bit(In_sync, &mirror->rdev->flags))
|
||||
&&
|
||||
(mirror->replacement == NULL ||
|
||||
test_bit(Faulty,
|
||||
&mirror->replacement->flags)))
|
||||
rcu_read_lock();
|
||||
mrdev = rcu_dereference(mirror->rdev);
|
||||
mreplace = rcu_dereference(mirror->replacement);
|
||||
|
||||
if ((mrdev == NULL ||
|
||||
test_bit(Faulty, &mrdev->flags) ||
|
||||
test_bit(In_sync, &mrdev->flags)) &&
|
||||
(mreplace == NULL ||
|
||||
test_bit(Faulty, &mreplace->flags))) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
|
||||
still_degraded = 0;
|
||||
/* want to reconstruct this device */
|
||||
|
@ -2956,8 +2975,11 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
/* last stripe is not complete - don't
|
||||
* try to recover this sector.
|
||||
*/
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
if (mreplace && test_bit(Faulty, &mreplace->flags))
|
||||
mreplace = NULL;
|
||||
/* Unless we are doing a full sync, or a replacement
|
||||
* we only need to recover the block if it is set in
|
||||
* the bitmap
|
||||
|
@ -2967,14 +2989,19 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
if (sync_blocks < max_sync)
|
||||
max_sync = sync_blocks;
|
||||
if (!must_sync &&
|
||||
mirror->replacement == NULL &&
|
||||
mreplace == NULL &&
|
||||
!conf->fullsync) {
|
||||
/* yep, skip the sync_blocks here, but don't assume
|
||||
* that there will never be anything to do here
|
||||
*/
|
||||
chunks_skipped = -1;
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&mrdev->nr_pending);
|
||||
if (mreplace)
|
||||
atomic_inc(&mreplace->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
|
||||
r10_bio->state = 0;
|
||||
|
@ -2993,12 +3020,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
/* Need to check if the array will still be
|
||||
* degraded
|
||||
*/
|
||||
for (j = 0; j < conf->geo.raid_disks; j++)
|
||||
if (conf->mirrors[j].rdev == NULL ||
|
||||
test_bit(Faulty, &conf->mirrors[j].rdev->flags)) {
|
||||
rcu_read_lock();
|
||||
for (j = 0; j < conf->geo.raid_disks; j++) {
|
||||
struct md_rdev *rdev = rcu_dereference(
|
||||
conf->mirrors[j].rdev);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
still_degraded = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
must_sync = bitmap_start_sync(mddev->bitmap, sect,
|
||||
&sync_blocks, still_degraded);
|
||||
|
@ -3008,15 +3038,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
int k;
|
||||
int d = r10_bio->devs[j].devnum;
|
||||
sector_t from_addr, to_addr;
|
||||
struct md_rdev *rdev;
|
||||
struct md_rdev *rdev =
|
||||
rcu_dereference(conf->mirrors[d].rdev);
|
||||
sector_t sector, first_bad;
|
||||
int bad_sectors;
|
||||
if (!conf->mirrors[d].rdev ||
|
||||
!test_bit(In_sync, &conf->mirrors[d].rdev->flags))
|
||||
if (!rdev ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
continue;
|
||||
/* This is where we read from */
|
||||
any_working = 1;
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
sector = r10_bio->devs[j].addr;
|
||||
|
||||
if (is_badblock(rdev, sector, max_sync,
|
||||
|
@ -3055,8 +3085,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
r10_bio->devs[1].devnum = i;
|
||||
r10_bio->devs[1].addr = to_addr;
|
||||
|
||||
rdev = mirror->rdev;
|
||||
if (!test_bit(In_sync, &rdev->flags)) {
|
||||
if (!test_bit(In_sync, &mrdev->flags)) {
|
||||
bio = r10_bio->devs[1].bio;
|
||||
bio_reset(bio);
|
||||
bio->bi_next = biolist;
|
||||
|
@ -3065,8 +3094,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bio->bi_end_io = end_sync_write;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
bio->bi_iter.bi_sector = to_addr
|
||||
+ rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
+ mrdev->data_offset;
|
||||
bio->bi_bdev = mrdev->bdev;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
} else
|
||||
r10_bio->devs[1].bio->bi_end_io = NULL;
|
||||
|
@ -3075,8 +3104,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bio = r10_bio->devs[1].repl_bio;
|
||||
if (bio)
|
||||
bio->bi_end_io = NULL;
|
||||
rdev = mirror->replacement;
|
||||
/* Note: if rdev != NULL, then bio
|
||||
/* Note: if mreplace != NULL, then bio
|
||||
* cannot be NULL as r10buf_pool_alloc will
|
||||
* have allocated it.
|
||||
* So the second test here is pointless.
|
||||
|
@ -3084,8 +3112,8 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
* this comment keeps human reviewers
|
||||
* happy.
|
||||
*/
|
||||
if (rdev == NULL || bio == NULL ||
|
||||
test_bit(Faulty, &rdev->flags))
|
||||
if (mreplace == NULL || bio == NULL ||
|
||||
test_bit(Faulty, &mreplace->flags))
|
||||
break;
|
||||
bio_reset(bio);
|
||||
bio->bi_next = biolist;
|
||||
|
@ -3094,11 +3122,12 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bio->bi_end_io = end_sync_write;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
bio->bi_iter.bi_sector = to_addr +
|
||||
rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
mreplace->data_offset;
|
||||
bio->bi_bdev = mreplace->bdev;
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (j == conf->copies) {
|
||||
/* Cannot recover, so abort the recovery or
|
||||
* record a bad block */
|
||||
|
@ -3111,15 +3140,15 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
if (r10_bio->devs[k].devnum == i)
|
||||
break;
|
||||
if (!test_bit(In_sync,
|
||||
&mirror->rdev->flags)
|
||||
&mrdev->flags)
|
||||
&& !rdev_set_badblocks(
|
||||
mirror->rdev,
|
||||
mrdev,
|
||||
r10_bio->devs[k].addr,
|
||||
max_sync, 0))
|
||||
any_working = 0;
|
||||
if (mirror->replacement &&
|
||||
if (mreplace &&
|
||||
!rdev_set_badblocks(
|
||||
mirror->replacement,
|
||||
mreplace,
|
||||
r10_bio->devs[k].addr,
|
||||
max_sync, 0))
|
||||
any_working = 0;
|
||||
|
@ -3137,8 +3166,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
if (rb2)
|
||||
atomic_dec(&rb2->remaining);
|
||||
r10_bio = rb2;
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mreplace)
|
||||
rdev_dec_pending(mreplace, mddev);
|
||||
break;
|
||||
}
|
||||
rdev_dec_pending(mrdev, mddev);
|
||||
if (mreplace)
|
||||
rdev_dec_pending(mreplace, mddev);
|
||||
}
|
||||
if (biolist == NULL) {
|
||||
while (r10_bio) {
|
||||
|
@ -3183,6 +3218,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
int d = r10_bio->devs[i].devnum;
|
||||
sector_t first_bad, sector;
|
||||
int bad_sectors;
|
||||
struct md_rdev *rdev;
|
||||
|
||||
if (r10_bio->devs[i].repl_bio)
|
||||
r10_bio->devs[i].repl_bio->bi_end_io = NULL;
|
||||
|
@ -3190,12 +3226,14 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bio = r10_bio->devs[i].bio;
|
||||
bio_reset(bio);
|
||||
bio->bi_error = -EIO;
|
||||
if (conf->mirrors[d].rdev == NULL ||
|
||||
test_bit(Faulty, &conf->mirrors[d].rdev->flags))
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
sector = r10_bio->devs[i].addr;
|
||||
if (is_badblock(conf->mirrors[d].rdev,
|
||||
sector, max_sync,
|
||||
if (is_badblock(rdev, sector, max_sync,
|
||||
&first_bad, &bad_sectors)) {
|
||||
if (first_bad > sector)
|
||||
max_sync = first_bad - sector;
|
||||
|
@ -3203,25 +3241,28 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bad_sectors -= (sector - first_bad);
|
||||
if (max_sync > bad_sectors)
|
||||
max_sync = bad_sectors;
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_read;
|
||||
bio_set_op_attrs(bio, REQ_OP_READ, 0);
|
||||
bio->bi_iter.bi_sector = sector +
|
||||
conf->mirrors[d].rdev->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||
bio->bi_iter.bi_sector = sector + rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
count++;
|
||||
|
||||
if (conf->mirrors[d].replacement == NULL ||
|
||||
test_bit(Faulty,
|
||||
&conf->mirrors[d].replacement->flags))
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
if (rdev == NULL || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* Need to set up for writing to the replacement */
|
||||
bio = r10_bio->devs[i].repl_bio;
|
||||
|
@ -3229,15 +3270,13 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
bio->bi_error = -EIO;
|
||||
|
||||
sector = r10_bio->devs[i].addr;
|
||||
atomic_inc(&conf->mirrors[d].rdev->nr_pending);
|
||||
bio->bi_next = biolist;
|
||||
biolist = bio;
|
||||
bio->bi_private = r10_bio;
|
||||
bio->bi_end_io = end_sync_write;
|
||||
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
|
||||
bio->bi_iter.bi_sector = sector +
|
||||
conf->mirrors[d].replacement->data_offset;
|
||||
bio->bi_bdev = conf->mirrors[d].replacement->bdev;
|
||||
bio->bi_iter.bi_sector = sector + rdev->data_offset;
|
||||
bio->bi_bdev = rdev->bdev;
|
||||
count++;
|
||||
}
|
||||
|
||||
|
@ -3504,6 +3543,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
|
|||
|
||||
spin_lock_init(&conf->resync_lock);
|
||||
init_waitqueue_head(&conf->wait_barrier);
|
||||
atomic_set(&conf->nr_pending, 0);
|
||||
|
||||
conf->thread = md_register_thread(raid10d, mddev, "raid10");
|
||||
if (!conf->thread)
|
||||
|
@ -4333,15 +4373,16 @@ read_more:
|
|||
blist = read_bio;
|
||||
read_bio->bi_next = NULL;
|
||||
|
||||
rcu_read_lock();
|
||||
for (s = 0; s < conf->copies*2; s++) {
|
||||
struct bio *b;
|
||||
int d = r10_bio->devs[s/2].devnum;
|
||||
struct md_rdev *rdev2;
|
||||
if (s&1) {
|
||||
rdev2 = conf->mirrors[d].replacement;
|
||||
rdev2 = rcu_dereference(conf->mirrors[d].replacement);
|
||||
b = r10_bio->devs[s/2].repl_bio;
|
||||
} else {
|
||||
rdev2 = conf->mirrors[d].rdev;
|
||||
rdev2 = rcu_dereference(conf->mirrors[d].rdev);
|
||||
b = r10_bio->devs[s/2].bio;
|
||||
}
|
||||
if (!rdev2 || test_bit(Faulty, &rdev2->flags))
|
||||
|
@ -4386,6 +4427,7 @@ read_more:
|
|||
nr_sectors += len >> 9;
|
||||
}
|
||||
bio_full:
|
||||
rcu_read_unlock();
|
||||
r10_bio->sectors = nr_sectors;
|
||||
|
||||
/* Now submit the read */
|
||||
|
@ -4437,16 +4479,20 @@ static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio)
|
|||
struct bio *b;
|
||||
int d = r10_bio->devs[s/2].devnum;
|
||||
struct md_rdev *rdev;
|
||||
rcu_read_lock();
|
||||
if (s&1) {
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
b = r10_bio->devs[s/2].repl_bio;
|
||||
} else {
|
||||
rdev = conf->mirrors[d].rdev;
|
||||
rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
b = r10_bio->devs[s/2].bio;
|
||||
}
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags))
|
||||
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||
rcu_read_unlock();
|
||||
continue;
|
||||
}
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
md_sync_acct(b->bi_bdev, r10_bio->sectors);
|
||||
atomic_inc(&r10_bio->remaining);
|
||||
b->bi_next = NULL;
|
||||
|
@ -4507,9 +4553,10 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
|||
if (s > (PAGE_SIZE >> 9))
|
||||
s = PAGE_SIZE >> 9;
|
||||
|
||||
rcu_read_lock();
|
||||
while (!success) {
|
||||
int d = r10b->devs[slot].devnum;
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
sector_t addr;
|
||||
if (rdev == NULL ||
|
||||
test_bit(Faulty, &rdev->flags) ||
|
||||
|
@ -4517,11 +4564,15 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
|||
goto failed;
|
||||
|
||||
addr = r10b->devs[slot].addr + idx * PAGE_SIZE;
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
rcu_read_unlock();
|
||||
success = sync_page_io(rdev,
|
||||
addr,
|
||||
s << 9,
|
||||
bvec[idx].bv_page,
|
||||
REQ_OP_READ, 0, false);
|
||||
rdev_dec_pending(rdev, mddev);
|
||||
rcu_read_lock();
|
||||
if (success)
|
||||
break;
|
||||
failed:
|
||||
|
@ -4531,6 +4582,7 @@ static int handle_reshape_read_error(struct mddev *mddev,
|
|||
if (slot == first_slot)
|
||||
break;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (!success) {
|
||||
/* couldn't read this block, must give up */
|
||||
set_bit(MD_RECOVERY_INTR,
|
||||
|
@ -4600,16 +4652,18 @@ static void raid10_finish_reshape(struct mddev *mddev)
|
|||
}
|
||||
} else {
|
||||
int d;
|
||||
rcu_read_lock();
|
||||
for (d = conf->geo.raid_disks ;
|
||||
d < conf->geo.raid_disks - mddev->delta_disks;
|
||||
d++) {
|
||||
struct md_rdev *rdev = conf->mirrors[d].rdev;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
rdev = conf->mirrors[d].replacement;
|
||||
rdev = rcu_dereference(conf->mirrors[d].replacement);
|
||||
if (rdev)
|
||||
clear_bit(In_sync, &rdev->flags);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
}
|
||||
mddev->layout = mddev->new_layout;
|
||||
mddev->chunk_sectors = 1 << conf->geo.chunk_shift;
|
||||
|
|
|
@ -64,10 +64,11 @@ struct r10conf {
|
|||
int pending_count;
|
||||
|
||||
spinlock_t resync_lock;
|
||||
int nr_pending;
|
||||
atomic_t nr_pending;
|
||||
int nr_waiting;
|
||||
int nr_queued;
|
||||
int barrier;
|
||||
int array_freeze_pending;
|
||||
sector_t next_resync;
|
||||
int fullsync; /* set to 1 if a full sync is needed,
|
||||
* (fresh device added).
|
||||
|
|
|
@ -3080,7 +3080,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
|
|||
struct md_rdev *rdev;
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
if (rdev && test_bit(In_sync, &rdev->flags))
|
||||
if (rdev && test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
atomic_inc(&rdev->nr_pending);
|
||||
else
|
||||
rdev = NULL;
|
||||
|
@ -3210,15 +3211,16 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
|||
/* During recovery devices cannot be removed, so
|
||||
* locking and refcounting of rdevs is not needed
|
||||
*/
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
rdev = conf->disks[i].replacement;
|
||||
rdev = rcu_dereference(conf->disks[i].replacement);
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
|
@ -3226,6 +3228,7 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
|||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
}
|
||||
rcu_read_unlock();
|
||||
if (abort)
|
||||
conf->recovery_disabled =
|
||||
conf->mddev->recovery_disabled;
|
||||
|
@ -3237,15 +3240,16 @@ static int want_replace(struct stripe_head *sh, int disk_idx)
|
|||
{
|
||||
struct md_rdev *rdev;
|
||||
int rv = 0;
|
||||
/* Doing recovery so rcu locking not required */
|
||||
rdev = sh->raid_conf->disks[disk_idx].replacement;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev = rcu_dereference(sh->raid_conf->disks[disk_idx].replacement);
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& (rdev->recovery_offset <= sh->sector
|
||||
|| rdev->mddev->recovery_cp <= sh->sector))
|
||||
rv = 1;
|
||||
|
||||
rcu_read_unlock();
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
@ -3600,7 +3604,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|||
pr_debug("for sector %llu, rmw=%d rcw=%d\n",
|
||||
(unsigned long long)sh->sector, rmw, rcw);
|
||||
set_bit(STRIPE_HANDLE, &sh->state);
|
||||
if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_ENABLE_RMW)) && rmw > 0) {
|
||||
if ((rmw < rcw || (rmw == rcw && conf->rmw_level == PARITY_PREFER_RMW)) && rmw > 0) {
|
||||
/* prefer read-modify-write, but need to get some data */
|
||||
if (conf->mddev->queue)
|
||||
blk_add_trace_msg(conf->mddev->queue,
|
||||
|
@ -3627,7 +3631,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
|
|||
}
|
||||
}
|
||||
}
|
||||
if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_ENABLE_RMW)) && rcw > 0) {
|
||||
if ((rcw < rmw || (rcw == rmw && conf->rmw_level != PARITY_PREFER_RMW)) && rcw > 0) {
|
||||
/* want reconstruct write, but need to get some data */
|
||||
int qread =0;
|
||||
rcw = 0;
|
||||
|
@ -7066,10 +7070,12 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
|
|||
seq_printf(seq, " level %d, %dk chunk, algorithm %d", mddev->level,
|
||||
conf->chunk_sectors / 2, mddev->layout);
|
||||
seq_printf (seq, " [%d/%d] [", conf->raid_disks, conf->raid_disks - mddev->degraded);
|
||||
for (i = 0; i < conf->raid_disks; i++)
|
||||
seq_printf (seq, "%s",
|
||||
conf->disks[i].rdev &&
|
||||
test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_");
|
||||
rcu_read_lock();
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = rcu_dereference(conf->disks[i].rdev);
|
||||
seq_printf (seq, "%s", rdev && test_bit(In_sync, &rdev->flags) ? "U" : "_");
|
||||
}
|
||||
rcu_read_unlock();
|
||||
seq_printf (seq, "]");
|
||||
}
|
||||
|
||||
|
@ -7191,12 +7197,15 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
|
|||
goto abort;
|
||||
}
|
||||
*rdevp = NULL;
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
} else if (p->replacement) {
|
||||
if (!test_bit(RemoveSynchronized, &rdev->flags)) {
|
||||
synchronize_rcu();
|
||||
if (atomic_read(&rdev->nr_pending)) {
|
||||
/* lost the race, try later */
|
||||
err = -EBUSY;
|
||||
*rdevp = rdev;
|
||||
}
|
||||
}
|
||||
if (p->replacement) {
|
||||
/* We must have just cleared 'rdev' */
|
||||
p->rdev = p->replacement;
|
||||
clear_bit(Replacement, &p->replacement->flags);
|
||||
|
|
Loading…
Reference in New Issue