md/raid5: fix handling of bad blocks during recovery.
1/ We can only treat a known-bad-block like a read-error if we have the data that belongs in that block. So fix that test. 2/ If we cannot recovery a stripe due to insufficient data, don't tell "md_done_sync" that the sync failed unless we really did fail something. If we successfully record bad blocks, that is success. Reported-by: "majianpeng" <majianpeng@gmail.com> Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
5220ea1e64
commit
18b9837ea0
|
@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
|||
int abort = 0;
|
||||
int i;
|
||||
|
||||
md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
|
||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||
s->syncing = 0;
|
||||
s->replacing = 0;
|
||||
/* There is nothing more to do for sync/check/repair.
|
||||
* Don't even need to abort as that is handled elsewhere
|
||||
* if needed, and not always wanted e.g. if there is a known
|
||||
* bad block here.
|
||||
* For recover/replace we need to record a bad block on all
|
||||
* non-sync devices, or abort the recovery
|
||||
*/
|
||||
if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
|
||||
return;
|
||||
/* During recovery devices cannot be removed, so locking and
|
||||
* refcounting of rdevs is not needed
|
||||
*/
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
}
|
||||
if (abort) {
|
||||
conf->recovery_disabled = conf->mddev->recovery_disabled;
|
||||
set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
|
||||
/* During recovery devices cannot be removed, so
|
||||
* locking and refcounting of rdevs is not needed
|
||||
*/
|
||||
for (i = 0; i < conf->raid_disks; i++) {
|
||||
struct md_rdev *rdev = conf->disks[i].rdev;
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
rdev = conf->disks[i].replacement;
|
||||
if (rdev
|
||||
&& !test_bit(Faulty, &rdev->flags)
|
||||
&& !test_bit(In_sync, &rdev->flags)
|
||||
&& !rdev_set_badblocks(rdev, sh->sector,
|
||||
STRIPE_SECTORS, 0))
|
||||
abort = 1;
|
||||
}
|
||||
if (abort)
|
||||
conf->recovery_disabled =
|
||||
conf->mddev->recovery_disabled;
|
||||
}
|
||||
md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
|
||||
}
|
||||
|
||||
static int want_replace(struct stripe_head *sh, int disk_idx)
|
||||
|
@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
|||
/* Not in-sync */;
|
||||
else if (is_bad) {
|
||||
/* also not in-sync */
|
||||
if (!test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||
if (!test_bit(WriteErrorSeen, &rdev->flags) &&
|
||||
test_bit(R5_UPTODATE, &dev->flags)) {
|
||||
/* treat as in-sync, but with a read error
|
||||
* which we can now try to correct
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue