dm raid: fix failed takeover/reshapes by keeping raid set frozen
Superblock updates where bogus causing some takovers/reshapes to fail. Introduce new runtime flag (RT_FLAG_KEEP_RS_FROZEN) to keep a raid set frozen when a layout change was requested. Userpace will immediately reload the table w/o the flags requesting such change once they made it to the superblocks and any change of recovery/reshape offsets has to be avoided until after read. Signed-off-by: Heinz Mauelshagen <heinzm@redhat.com> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
parent
4257e085e2
commit
6e20902e8f
|
@ -190,6 +190,7 @@ struct raid_dev {
|
||||||
#define RT_FLAG_RS_BITMAP_LOADED 2
|
#define RT_FLAG_RS_BITMAP_LOADED 2
|
||||||
#define RT_FLAG_UPDATE_SBS 3
|
#define RT_FLAG_UPDATE_SBS 3
|
||||||
#define RT_FLAG_RESHAPE_RS 4
|
#define RT_FLAG_RESHAPE_RS 4
|
||||||
|
#define RT_FLAG_KEEP_RS_FROZEN 5
|
||||||
|
|
||||||
/* Array elements of 64 bit needed for rebuild/write_mostly bits */
|
/* Array elements of 64 bit needed for rebuild/write_mostly bits */
|
||||||
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
|
||||||
|
@ -2727,6 +2728,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||||
|
set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
|
||||||
rs_set_new(rs);
|
rs_set_new(rs);
|
||||||
} else if (rs_reshape_requested(rs)) {
|
} else if (rs_reshape_requested(rs)) {
|
||||||
if (rs_is_reshaping(rs)) {
|
if (rs_is_reshaping(rs)) {
|
||||||
|
@ -2767,13 +2769,19 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
|
||||||
* Would cause allocations in raid1->check_reshape
|
* Would cause allocations in raid1->check_reshape
|
||||||
* though, thus more issues with potential failures
|
* though, thus more issues with potential failures
|
||||||
*/
|
*/
|
||||||
else if (rs_is_raid1(rs))
|
else if (rs_is_raid1(rs)) {
|
||||||
|
set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
|
||||||
rs->md.raid_disks = rs->raid_disks;
|
rs->md.raid_disks = rs->raid_disks;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (test_bit(RT_FLAG_RESHAPE_RS, &rs->runtime_flags)) {
|
||||||
|
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
||||||
|
set_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags);
|
||||||
|
}
|
||||||
|
|
||||||
if (rs->md.raid_disks < rs->raid_disks)
|
if (rs->md.raid_disks < rs->raid_disks)
|
||||||
set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags);
|
set_bit(MD_ARRAY_FIRST_USE, &rs->md.flags);
|
||||||
|
|
||||||
set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags);
|
|
||||||
rs_set_cur(rs);
|
rs_set_cur(rs);
|
||||||
} else
|
} else
|
||||||
rs_set_cur(rs);
|
rs_set_cur(rs);
|
||||||
|
@ -3231,9 +3239,11 @@ static void raid_postsuspend(struct dm_target *ti)
|
||||||
{
|
{
|
||||||
struct raid_set *rs = ti->private;
|
struct raid_set *rs = ti->private;
|
||||||
|
|
||||||
mddev_suspend(&rs->md);
|
if (test_and_clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags)) {
|
||||||
rs->md.ro = 1;
|
if (!rs->md.suspended)
|
||||||
clear_bit(RT_FLAG_RS_RESUMED, &rs->runtime_flags);
|
mddev_suspend(&rs->md);
|
||||||
|
rs->md.ro = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
static void attempt_restore_of_faulty_devices(struct raid_set *rs)
|
||||||
|
@ -3308,6 +3318,18 @@ static int __load_dirty_region_bitmap(struct raid_set *rs)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Enforce updating all superblocks */
|
||||||
|
static void rs_update_sbs(struct raid_set *rs)
|
||||||
|
{
|
||||||
|
struct mddev *mddev = &rs->md;
|
||||||
|
int ro = mddev->ro;
|
||||||
|
|
||||||
|
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||||
|
mddev->ro = 0;
|
||||||
|
md_update_sb(mddev, 1);
|
||||||
|
mddev->ro = ro;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Reshape changes raid algorithm of @rs to new one within personality
|
* Reshape changes raid algorithm of @rs to new one within personality
|
||||||
* (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
|
* (e.g. raid6_zr -> raid6_nc), changes stripe size, adds/removes
|
||||||
|
@ -3356,9 +3378,12 @@ static int rs_start_reshape(struct raid_set *rs)
|
||||||
if (!mddev->suspended)
|
if (!mddev->suspended)
|
||||||
mddev_suspend(mddev);
|
mddev_suspend(mddev);
|
||||||
|
|
||||||
mddev->ro = 0;
|
/*
|
||||||
md_update_sb(mddev, 1);
|
* Now reshape got set up, update superblocks to
|
||||||
mddev->ro = 1;
|
* reflect the fact so that a table reload will
|
||||||
|
* access proper superblock content in the ctr.
|
||||||
|
*/
|
||||||
|
rs_update_sbs(rs);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3375,22 +3400,12 @@ static int raid_preresume(struct dm_target *ti)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The superblocks need to be updated on disk if the
|
* The superblocks need to be updated on disk if the
|
||||||
* array is new or __load_dirty_region_bitmap will overwrite them
|
* array is new or new devices got added (thus zeroed
|
||||||
* in core with old data.
|
* out by userspace) or __load_dirty_region_bitmap
|
||||||
*
|
* will overwrite them in core with old data or fail.
|
||||||
* In case the array got modified (takeover/reshape/resize)
|
|
||||||
* or the data offsets on the component devices changed, they
|
|
||||||
* have to be updated as well.
|
|
||||||
*
|
|
||||||
* Have to switch to readwrite and back in order to
|
|
||||||
* allow for the superblock updates.
|
|
||||||
*/
|
*/
|
||||||
if (test_and_clear_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags)) {
|
if (test_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags))
|
||||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
rs_update_sbs(rs);
|
||||||
mddev->ro = 0;
|
|
||||||
md_update_sb(mddev, 1);
|
|
||||||
mddev->ro = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Disable/enable discard support on raid set after any
|
* Disable/enable discard support on raid set after any
|
||||||
|
@ -3449,14 +3464,26 @@ static void raid_resume(struct dm_target *ti)
|
||||||
* devices are reachable again.
|
* devices are reachable again.
|
||||||
*/
|
*/
|
||||||
attempt_restore_of_faulty_devices(rs);
|
attempt_restore_of_faulty_devices(rs);
|
||||||
|
} else {
|
||||||
|
mddev->ro = 0;
|
||||||
|
mddev->in_sync = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When passing in flags to the ctr, we expect userspace
|
||||||
|
* to reset them because they made it to the superblocks
|
||||||
|
* and reload the mapping anyway.
|
||||||
|
*
|
||||||
|
* -> only unfreeze recovery in case of a table reload or
|
||||||
|
* we'll have a bogus recovery/reshape position
|
||||||
|
* retrieved from the superblock by the ctr because
|
||||||
|
* the ongoing recovery/reshape will change it after read.
|
||||||
|
*/
|
||||||
|
if (!test_bit(RT_FLAG_KEEP_RS_FROZEN, &rs->runtime_flags))
|
||||||
|
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||||
|
|
||||||
|
if (mddev->suspended)
|
||||||
|
mddev_resume(mddev);
|
||||||
}
|
}
|
||||||
|
|
||||||
mddev->ro = 0;
|
|
||||||
mddev->in_sync = 0;
|
|
||||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
|
||||||
|
|
||||||
if (mddev->suspended)
|
|
||||||
mddev_resume(mddev);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct target_type raid_target = {
|
static struct target_type raid_target = {
|
||||||
|
|
Loading…
Reference in New Issue