md/raid5: ensure sync and DISCARD don't happen at the same time.
A number of problems can occur due to races between
resync/recovery and discard.
- if sync_request calls handle_stripe() while a discard is
happening on the stripe, it might call handle_stripe_clean_event
before all of the individual discard requests have completed
(so some devices are still locked, but not all).
Since commit ca64cae960
md/raid5: Make sure we clear R5_Discard when discard is finished.
this will cause R5_Discard to be cleared for the parity device,
so handle_stripe_clean_event() will not be called when the other
devices do become unlocked, so their ->written will not be cleared.
This ultimately leads to a WARN_ON in init_stripe and a lock-up.
- If handle_stripe_clean_event() does clear R5_UPTODATE at an awkward
time for resync, it can lead to s->uptodate being less than disks
in handle_parity_checks5(), which triggers a BUG (because it is
one).
So:
- keep R5_Discard on the parity device until all other devices have
completed their discard request
- make sure we don't try to have a 'discard' and a 'sync' action at
the same time.
This involves a new stripe flag to we know when a 'discard' is
happening, and the use of R5_Overlap on the parity disk so when a
discard is wanted while a sync is active, so we know to wake up
the discard at the appropriate time.
Discard support for RAID5 was added in 3.7, so this is suitable for
any -stable kernel since 3.7.
Cc: stable@vger.kernel.org (v3.7+)
Reported-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Tested-by: Jes Sorensen <Jes.Sorensen@redhat.com>
Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
90584fc93d
commit
f8dfcffd04
|
@ -2576,6 +2576,8 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||||
|
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
|
||||||
|
wake_up(&conf->wait_for_overlap);
|
||||||
s->syncing = 0;
|
s->syncing = 0;
|
||||||
s->replacing = 0;
|
s->replacing = 0;
|
||||||
/* There is nothing more to do for sync/check/repair.
|
/* There is nothing more to do for sync/check/repair.
|
||||||
|
@ -2749,6 +2751,7 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
struct r5dev *dev;
|
struct r5dev *dev;
|
||||||
|
int discard_pending = 0;
|
||||||
|
|
||||||
for (i = disks; i--; )
|
for (i = disks; i--; )
|
||||||
if (sh->dev[i].written) {
|
if (sh->dev[i].written) {
|
||||||
|
@ -2777,9 +2780,23 @@ static void handle_stripe_clean_event(struct r5conf *conf,
|
||||||
STRIPE_SECTORS,
|
STRIPE_SECTORS,
|
||||||
!test_bit(STRIPE_DEGRADED, &sh->state),
|
!test_bit(STRIPE_DEGRADED, &sh->state),
|
||||||
0);
|
0);
|
||||||
}
|
} else if (test_bit(R5_Discard, &dev->flags))
|
||||||
} else if (test_bit(R5_Discard, &sh->dev[i].flags))
|
discard_pending = 1;
|
||||||
clear_bit(R5_Discard, &sh->dev[i].flags);
|
}
|
||||||
|
if (!discard_pending &&
|
||||||
|
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
|
||||||
|
clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
|
||||||
|
clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
|
||||||
|
if (sh->qd_idx >= 0) {
|
||||||
|
clear_bit(R5_Discard, &sh->dev[sh->qd_idx].flags);
|
||||||
|
clear_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags);
|
||||||
|
}
|
||||||
|
/* now that discard is done we can proceed with any sync */
|
||||||
|
clear_bit(STRIPE_DISCARD, &sh->state);
|
||||||
|
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))
|
||||||
|
set_bit(STRIPE_HANDLE, &sh->state);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
|
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
|
||||||
if (atomic_dec_and_test(&conf->pending_full_writes))
|
if (atomic_dec_and_test(&conf->pending_full_writes))
|
||||||
|
@ -3431,9 +3448,15 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
|
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
|
||||||
set_bit(STRIPE_SYNCING, &sh->state);
|
spin_lock(&sh->stripe_lock);
|
||||||
clear_bit(STRIPE_INSYNC, &sh->state);
|
/* Cannot process 'sync' concurrently with 'discard' */
|
||||||
|
if (!test_bit(STRIPE_DISCARD, &sh->state) &&
|
||||||
|
test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) {
|
||||||
|
set_bit(STRIPE_SYNCING, &sh->state);
|
||||||
|
clear_bit(STRIPE_INSYNC, &sh->state);
|
||||||
|
}
|
||||||
|
spin_unlock(&sh->stripe_lock);
|
||||||
}
|
}
|
||||||
clear_bit(STRIPE_DELAYED, &sh->state);
|
clear_bit(STRIPE_DELAYED, &sh->state);
|
||||||
|
|
||||||
|
@ -3593,6 +3616,8 @@ static void handle_stripe(struct stripe_head *sh)
|
||||||
test_bit(STRIPE_INSYNC, &sh->state)) {
|
test_bit(STRIPE_INSYNC, &sh->state)) {
|
||||||
md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
|
md_done_sync(conf->mddev, STRIPE_SECTORS, 1);
|
||||||
clear_bit(STRIPE_SYNCING, &sh->state);
|
clear_bit(STRIPE_SYNCING, &sh->state);
|
||||||
|
if (test_and_clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags))
|
||||||
|
wake_up(&conf->wait_for_overlap);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* If the failed drives are just a ReadError, then we might need
|
/* If the failed drives are just a ReadError, then we might need
|
||||||
|
@ -4159,6 +4184,13 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||||
sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
|
sh = get_active_stripe(conf, logical_sector, 0, 0, 0);
|
||||||
prepare_to_wait(&conf->wait_for_overlap, &w,
|
prepare_to_wait(&conf->wait_for_overlap, &w,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
|
set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
|
||||||
|
if (test_bit(STRIPE_SYNCING, &sh->state)) {
|
||||||
|
release_stripe(sh);
|
||||||
|
schedule();
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
clear_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags);
|
||||||
spin_lock_irq(&sh->stripe_lock);
|
spin_lock_irq(&sh->stripe_lock);
|
||||||
for (d = 0; d < conf->raid_disks; d++) {
|
for (d = 0; d < conf->raid_disks; d++) {
|
||||||
if (d == sh->pd_idx || d == sh->qd_idx)
|
if (d == sh->pd_idx || d == sh->qd_idx)
|
||||||
|
@ -4171,6 +4203,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
|
||||||
goto again;
|
goto again;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
set_bit(STRIPE_DISCARD, &sh->state);
|
||||||
finish_wait(&conf->wait_for_overlap, &w);
|
finish_wait(&conf->wait_for_overlap, &w);
|
||||||
for (d = 0; d < conf->raid_disks; d++) {
|
for (d = 0; d < conf->raid_disks; d++) {
|
||||||
if (d == sh->pd_idx || d == sh->qd_idx)
|
if (d == sh->pd_idx || d == sh->qd_idx)
|
||||||
|
|
|
@ -323,6 +323,7 @@ enum {
|
||||||
STRIPE_COMPUTE_RUN,
|
STRIPE_COMPUTE_RUN,
|
||||||
STRIPE_OPS_REQ_PENDING,
|
STRIPE_OPS_REQ_PENDING,
|
||||||
STRIPE_ON_UNPLUG_LIST,
|
STRIPE_ON_UNPLUG_LIST,
|
||||||
|
STRIPE_DISCARD,
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
Loading…
Reference in New Issue