Assorted md fixes for 3.10
A mixed bad of little fixes. No real new functionality here. Several patches are tagged for -stable. -----BEGIN PGP SIGNATURE----- Version: GnuPG v2.0.19 (GNU/Linux) iQIVAwUAUYBTNTnsnt1WYoG5AQKn2w/7BQDRB5pNfOe+Omb7cwO8QKIWIn2RvJ1H 8ki0+folaqX9OOpWZUb1KEKhOnJSMPL6c6NS5a358aQRgiKJBRQ5Qoe4nz9v7BuY SQnO8VjNrsHeEZ7iH1krtx0yNUklinO5j5HxEnNp7cL3m1HVxmS7hKJOlc9LYz9R KLqOduBuhLHUSQnnzU52LiQ9Y7Au5eUtmwFG2SF+u4DIN+lyKfD45Y4PR2Um/hPe 7QdR2GOX75YMgAWjq3kafodrhJFNzxSoFuexjRQeGC4bTn1JLGK6Uu/GER8VSnS+ FHoet85W5rg12NtJjC3qlfxahfmBYThsi00RB4EuNZxto3is0o8NwW6bnNjKSjHh wRjXUOr/ANnCYXjPhffjIvZERMyma/7GrjG20cxDmj/iJnucI1oprfzFHx7RAOKo gcBFVJiPLYzXZJkyU6nA4bjvWk/KYM/a5nzzaBsU9KObmgqL7H7SJTh8nKrTNhDr M4AkC3DIVlDaNoSloFRtyLXylrHT5KBBBBklWGFZiauicaLPbC8asElFVZjsRyTL 45ZLTmyFXHtVn6HIdY8JOtGR+iKay/EHFN62TKH8AMmCQY+JKlaQdDXFbqzZ29Gz n5pjA8/+9ddP9P+f8xC8P29mk7HkvujJH3gOepU78b8GIIzBXMKc7NZVHhPVv7f4 N6u7i5uxgxc= =Z9Ws -----END PGP SIGNATURE----- Merge tag 'md-3.10' of git://neil.brown.name/md Pull md fixes from NeilBrown: "A mixed bag of little fixes. No real new functionality here. Several patches are tagged for -stable." * tag 'md-3.10' of git://neil.brown.name/md: MD: ignore discard request for hard disks of hybid raid1/raid10 array md: bad block list should default to disabled. md: raid1/raid10 md devices leak memory when stopping DM RAID: Add message/status support for changing sync action MD: Export 'md_reap_sync_thread' function md: don't update metadata when stopping a read-only array. md: Allow devices to be re-added to a read-only array. md/raid10: Allow skipping recovery when clean arrays are assembled MD: Fix typos in MD documentation md/raid5: avoid an extra write when writing to a known-bad-block. md/raid5: Change or of some order to improve efficiency. md: use set_bit_le and clear_bit_le md: HOT_DISK_REMOVE shouldn't make a read-auto device active. md: use common code for all calls to ->hot_remove_disk() md: never update metadata when array is read-only.
This commit is contained in:
commit
f1e9a236e5
|
@ -1,10 +1,13 @@
|
|||
dm-raid
|
||||
-------
|
||||
=======
|
||||
|
||||
The device-mapper RAID (dm-raid) target provides a bridge from DM to MD.
|
||||
It allows the MD RAID drivers to be accessed using a device-mapper
|
||||
interface.
|
||||
|
||||
|
||||
Mapping Table Interface
|
||||
-----------------------
|
||||
The target is named "raid" and it accepts the following parameters:
|
||||
|
||||
<raid_type> <#raid_params> <raid_params> \
|
||||
|
@ -47,7 +50,7 @@ The target is named "raid" and it accepts the following parameters:
|
|||
followed by optional parameters (in any order):
|
||||
[sync|nosync] Force or prevent RAID initialization.
|
||||
|
||||
[rebuild <idx>] Rebuild drive number idx (first drive is 0).
|
||||
[rebuild <idx>] Rebuild drive number 'idx' (first drive is 0).
|
||||
|
||||
[daemon_sleep <ms>]
|
||||
Interval between runs of the bitmap daemon that
|
||||
|
@ -56,9 +59,9 @@ The target is named "raid" and it accepts the following parameters:
|
|||
|
||||
[min_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||
[max_recovery_rate <kB/sec/disk>] Throttle RAID initialization
|
||||
[write_mostly <idx>] Drive index is write-mostly
|
||||
[max_write_behind <sectors>] See '-write-behind=' (man mdadm)
|
||||
[stripe_cache <sectors>] Stripe cache size (higher RAIDs only)
|
||||
[write_mostly <idx>] Mark drive index 'idx' write-mostly.
|
||||
[max_write_behind <sectors>] See '--write-behind=' (man mdadm)
|
||||
[stripe_cache <sectors>] Stripe cache size (RAID 4/5/6 only)
|
||||
[region_size <sectors>]
|
||||
The region_size multiplied by the number of regions is the
|
||||
logical size of the array. The bitmap records the device
|
||||
|
@ -122,7 +125,7 @@ The target is named "raid" and it accepts the following parameters:
|
|||
given for both the metadata and data drives for a given position.
|
||||
|
||||
|
||||
Example tables
|
||||
Example Tables
|
||||
--------------
|
||||
# RAID4 - 4 data drives, 1 parity (no metadata devices)
|
||||
# No metadata devices specified to hold superblock/bitmap info
|
||||
|
@ -141,26 +144,70 @@ Example tables
|
|||
raid4 4 2048 sync min_recovery_rate 20 \
|
||||
5 8:17 8:18 8:33 8:34 8:49 8:50 8:65 8:66 8:81 8:82
|
||||
|
||||
|
||||
Status Output
|
||||
-------------
|
||||
'dmsetup table' displays the table used to construct the mapping.
|
||||
The optional parameters are always printed in the order listed
|
||||
above with "sync" or "nosync" always output ahead of the other
|
||||
arguments, regardless of the order used when originally loading the table.
|
||||
Arguments that can be repeated are ordered by value.
|
||||
|
||||
'dmsetup status' yields information on the state and health of the
|
||||
array.
|
||||
The output is as follows:
|
||||
|
||||
'dmsetup status' yields information on the state and health of the array.
|
||||
The output is as follows (normally a single line, but expanded here for
|
||||
clarity):
|
||||
1: <s> <l> raid \
|
||||
2: <raid_type> <#devices> <1 health char for each dev> <resync_ratio>
|
||||
2: <raid_type> <#devices> <health_chars> \
|
||||
3: <sync_ratio> <sync_action> <mismatch_cnt>
|
||||
|
||||
Line 1 is the standard output produced by device-mapper.
|
||||
Line 2 is produced by the raid target, and best explained by example:
|
||||
0 1960893648 raid raid4 5 AAAAA 2/490221568
|
||||
Line 2 & 3 are produced by the raid target and are best explained by example:
|
||||
0 1960893648 raid raid4 5 AAAAA 2/490221568 init 0
|
||||
Here we can see the RAID type is raid4, there are 5 devices - all of
|
||||
which are 'A'live, and the array is 2/490221568 complete with recovery.
|
||||
Faulty or missing devices are marked 'D'. Devices that are out-of-sync
|
||||
are marked 'a'.
|
||||
which are 'A'live, and the array is 2/490221568 complete with its initial
|
||||
recovery. Here is a fuller description of the individual fields:
|
||||
<raid_type> Same as the <raid_type> used to create the array.
|
||||
<health_chars> One char for each device, indicating: 'A' = alive and
|
||||
in-sync, 'a' = alive but not in-sync, 'D' = dead/failed.
|
||||
<sync_ratio> The ratio indicating how much of the array has undergone
|
||||
the process described by 'sync_action'. If the
|
||||
'sync_action' is "check" or "repair", then the process
|
||||
of "resync" or "recover" can be considered complete.
|
||||
<sync_action> One of the following possible states:
|
||||
idle - No synchronization action is being performed.
|
||||
frozen - The current action has been halted.
|
||||
resync - Array is undergoing its initial synchronization
|
||||
or is resynchronizing after an unclean shutdown
|
||||
(possibly aided by a bitmap).
|
||||
recover - A device in the array is being rebuilt or
|
||||
replaced.
|
||||
check - A user-initiated full check of the array is
|
||||
being performed. All blocks are read and
|
||||
checked for consistency. The number of
|
||||
discrepancies found are recorded in
|
||||
<mismatch_cnt>. No changes are made to the
|
||||
array by this action.
|
||||
repair - The same as "check", but discrepancies are
|
||||
corrected.
|
||||
reshape - The array is undergoing a reshape.
|
||||
<mismatch_cnt> The number of discrepancies found between mirror copies
|
||||
in RAID1/10 or wrong parity values found in RAID4/5/6.
|
||||
This value is valid only after a "check" of the array
|
||||
is performed. A healthy array has a 'mismatch_cnt' of 0.
|
||||
|
||||
Message Interface
|
||||
-----------------
|
||||
The dm-raid target will accept certain actions through the 'message' interface.
|
||||
('man dmsetup' for more information on the message interface.) These actions
|
||||
include:
|
||||
"idle" - Halt the current sync action.
|
||||
"frozen" - Freeze the current sync action.
|
||||
"resync" - Initiate/continue a resync.
|
||||
"recover"- Initiate/continue a recover process.
|
||||
"check" - Initiate a check (i.e. a "scrub") of the array.
|
||||
"repair" - Initiate a repair of the array.
|
||||
"reshape"- Currently unsupported (-EINVAL).
|
||||
|
||||
Version History
|
||||
---------------
|
||||
|
@ -171,4 +218,7 @@ Version History
|
|||
1.3.1 Allow device replacement/rebuild for RAID 10
|
||||
1.3.2 Fix/improve redundancy checking for RAID10
|
||||
1.4.0 Non-functional change. Removes arg from mapping function.
|
||||
1.4.1 Add RAID10 "far" and "offset" algorithm support.
|
||||
1.4.1 RAID10 fix redundancy validation checks (commit 55ebbb5).
|
||||
1.4.2 Add RAID10 "far" and "offset" algorithm support.
|
||||
1.5.0 Add message interface to allow manipulation of the sync_action.
|
||||
New status (STATUSTYPE_INFO) fields: sync_action and mismatch_cnt.
|
||||
|
|
|
@ -119,7 +119,7 @@ device to add.
|
|||
The array is started with the RUN_ARRAY ioctl.
|
||||
|
||||
Once started, new devices can be added. They should have an
|
||||
appropriate superblock written to them, and then passed be in with
|
||||
appropriate superblock written to them, and then be passed in with
|
||||
ADD_NEW_DISK.
|
||||
|
||||
Devices that have failed or are not yet active can be detached from an
|
||||
|
@ -131,7 +131,7 @@ Specific Rules that apply to format-0 super block arrays, and
|
|||
-------------------------------------------------------------
|
||||
|
||||
An array can be 'created' by describing the array (level, chunksize
|
||||
etc) in a SET_ARRAY_INFO ioctl. This must has major_version==0 and
|
||||
etc) in a SET_ARRAY_INFO ioctl. This must have major_version==0 and
|
||||
raid_disks != 0.
|
||||
|
||||
Then uninitialized devices can be added with ADD_NEW_DISK. The
|
||||
|
@ -426,7 +426,7 @@ Each directory contains:
|
|||
offset
|
||||
This gives the location in the device (in sectors from the
|
||||
start) where data from the array will be stored. Any part of
|
||||
the device before this offset us not touched, unless it is
|
||||
the device before this offset is not touched, unless it is
|
||||
used for storing metadata (Formats 1.1 and 1.2).
|
||||
|
||||
size
|
||||
|
@ -440,7 +440,7 @@ Each directory contains:
|
|||
When the device is not 'in_sync', this records the number of
|
||||
sectors from the start of the device which are known to be
|
||||
correct. This is normally zero, but during a recovery
|
||||
operation is will steadily increase, and if the recovery is
|
||||
operation it will steadily increase, and if the recovery is
|
||||
interrupted, restoring this value can cause recovery to
|
||||
avoid repeating the earlier blocks. With v1.x metadata, this
|
||||
value is saved and restored automatically.
|
||||
|
@ -468,7 +468,7 @@ Each directory contains:
|
|||
|
||||
|
||||
|
||||
An active md device will also contain and entry for each active device
|
||||
An active md device will also contain an entry for each active device
|
||||
in the array. These are named
|
||||
|
||||
rdNN
|
||||
|
@ -482,7 +482,7 @@ will show 'in_sync' on every line.
|
|||
|
||||
|
||||
|
||||
Active md devices for levels that support data redundancy (1,4,5,6)
|
||||
Active md devices for levels that support data redundancy (1,4,5,6,10)
|
||||
also have
|
||||
|
||||
sync_action
|
||||
|
@ -494,7 +494,7 @@ also have
|
|||
failed/missing device
|
||||
idle - nothing is happening
|
||||
check - A full check of redundancy was requested and is
|
||||
happening. This reads all block and checks
|
||||
happening. This reads all blocks and checks
|
||||
them. A repair may also happen for some raid
|
||||
levels.
|
||||
repair - A full check and repair is happening. This is
|
||||
|
@ -522,7 +522,7 @@ also have
|
|||
|
||||
degraded
|
||||
This contains a count of the number of devices by which the
|
||||
arrays is degraded. So an optimal array with show '0'. A
|
||||
arrays is degraded. So an optimal array will show '0'. A
|
||||
single failed/missing drive will show '1', etc.
|
||||
This file responds to select/poll, any increase or decrease
|
||||
in the count of missing devices will trigger an event.
|
||||
|
|
|
@ -846,7 +846,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
|
|||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
set_bit(bit, kaddr);
|
||||
else
|
||||
test_and_set_bit_le(bit, kaddr);
|
||||
set_bit_le(bit, kaddr);
|
||||
kunmap_atomic(kaddr);
|
||||
pr_debug("set file bit %lu page %lu\n", bit, page->index);
|
||||
/* record page number so it gets flushed to disk when unplug occurs */
|
||||
|
@ -868,7 +868,7 @@ static void bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
|
|||
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
|
||||
clear_bit(bit, paddr);
|
||||
else
|
||||
test_and_clear_bit_le(bit, paddr);
|
||||
clear_bit_le(bit, paddr);
|
||||
kunmap_atomic(paddr);
|
||||
if (!test_page_attr(bitmap, page->index, BITMAP_PAGE_NEEDWRITE)) {
|
||||
set_page_attr(bitmap, page->index, BITMAP_PAGE_PENDING);
|
||||
|
|
|
@ -1279,6 +1279,31 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
|
|||
return DM_MAPIO_SUBMITTED;
|
||||
}
|
||||
|
||||
static const char *decipher_sync_action(struct mddev *mddev)
|
||||
{
|
||||
if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
|
||||
return "frozen";
|
||||
|
||||
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
(!mddev->ro && test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))) {
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
return "reshape";
|
||||
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||
return "resync";
|
||||
else if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
|
||||
return "check";
|
||||
return "repair";
|
||||
}
|
||||
|
||||
if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery))
|
||||
return "recover";
|
||||
}
|
||||
|
||||
return "idle";
|
||||
}
|
||||
|
||||
static void raid_status(struct dm_target *ti, status_type_t type,
|
||||
unsigned status_flags, char *result, unsigned maxlen)
|
||||
{
|
||||
|
@ -1298,8 +1323,18 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
sync = rs->md.recovery_cp;
|
||||
|
||||
if (sync >= rs->md.resync_max_sectors) {
|
||||
/*
|
||||
* Sync complete.
|
||||
*/
|
||||
array_in_sync = 1;
|
||||
sync = rs->md.resync_max_sectors;
|
||||
} else if (test_bit(MD_RECOVERY_REQUESTED, &rs->md.recovery)) {
|
||||
/*
|
||||
* If "check" or "repair" is occurring, the array has
|
||||
* undergone and initial sync and the health characters
|
||||
* should not be 'a' anymore.
|
||||
*/
|
||||
array_in_sync = 1;
|
||||
} else {
|
||||
/*
|
||||
* The array may be doing an initial sync, or it may
|
||||
|
@ -1311,6 +1346,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
if (!test_bit(In_sync, &rs->dev[i].rdev.flags))
|
||||
array_in_sync = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Status characters:
|
||||
* 'D' = Dead/Failed device
|
||||
|
@ -1339,6 +1375,21 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
(unsigned long long) sync,
|
||||
(unsigned long long) rs->md.resync_max_sectors);
|
||||
|
||||
/*
|
||||
* Sync action:
|
||||
* See Documentation/device-mapper/dm-raid.c for
|
||||
* information on each of these states.
|
||||
*/
|
||||
DMEMIT(" %s", decipher_sync_action(&rs->md));
|
||||
|
||||
/*
|
||||
* resync_mismatches/mismatch_cnt
|
||||
* This field shows the number of discrepancies found when
|
||||
* performing a "check" of the array.
|
||||
*/
|
||||
DMEMIT(" %llu",
|
||||
(unsigned long long)
|
||||
atomic64_read(&rs->md.resync_mismatches));
|
||||
break;
|
||||
case STATUSTYPE_TABLE:
|
||||
/* The string you would use to construct this array */
|
||||
|
@ -1425,7 +1476,62 @@ static void raid_status(struct dm_target *ti, status_type_t type,
|
|||
}
|
||||
}
|
||||
|
||||
static int raid_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
|
||||
static int raid_message(struct dm_target *ti, unsigned argc, char **argv)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
struct mddev *mddev = &rs->md;
|
||||
|
||||
if (!strcasecmp(argv[0], "reshape")) {
|
||||
DMERR("Reshape not supported.");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!mddev->pers || !mddev->pers->sync_request)
|
||||
return -EINVAL;
|
||||
|
||||
if (!strcasecmp(argv[0], "frozen"))
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
else
|
||||
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
|
||||
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
||||
return -EBUSY;
|
||||
else if (!strcasecmp(argv[0], "resync"))
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
else if (!strcasecmp(argv[0], "recover")) {
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
} else {
|
||||
if (!strcasecmp(argv[0], "check"))
|
||||
set_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
else if (!!strcasecmp(argv[0], "repair"))
|
||||
return -EINVAL;
|
||||
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
}
|
||||
if (mddev->ro == 2) {
|
||||
/* A write to sync_action is enough to justify
|
||||
* canceling read-auto mode
|
||||
*/
|
||||
mddev->ro = 0;
|
||||
if (!mddev->suspended)
|
||||
md_wakeup_thread(mddev->sync_thread);
|
||||
}
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
if (!mddev->suspended)
|
||||
md_wakeup_thread(mddev->thread);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int raid_iterate_devices(struct dm_target *ti,
|
||||
iterate_devices_callout_fn fn, void *data)
|
||||
{
|
||||
struct raid_set *rs = ti->private;
|
||||
unsigned i;
|
||||
|
@ -1482,12 +1588,13 @@ static void raid_resume(struct dm_target *ti)
|
|||
|
||||
static struct target_type raid_target = {
|
||||
.name = "raid",
|
||||
.version = {1, 4, 2},
|
||||
.version = {1, 5, 0},
|
||||
.module = THIS_MODULE,
|
||||
.ctr = raid_ctr,
|
||||
.dtr = raid_dtr,
|
||||
.map = raid_map,
|
||||
.status = raid_status,
|
||||
.message = raid_message,
|
||||
.iterate_devices = raid_iterate_devices,
|
||||
.io_hints = raid_io_hints,
|
||||
.presuspend = raid_presuspend,
|
||||
|
|
237
drivers/md/md.c
237
drivers/md/md.c
|
@ -72,6 +72,9 @@ static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
|
|||
static struct workqueue_struct *md_wq;
|
||||
static struct workqueue_struct *md_misc_wq;
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this);
|
||||
|
||||
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
|
||||
|
||||
/*
|
||||
|
@ -1564,8 +1567,8 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
|
|||
sector, count, 1) == 0)
|
||||
return -EINVAL;
|
||||
}
|
||||
} else if (sb->bblog_offset == 0)
|
||||
rdev->badblocks.shift = -1;
|
||||
} else if (sb->bblog_offset != 0)
|
||||
rdev->badblocks.shift = 0;
|
||||
|
||||
if (!refdev) {
|
||||
ret = 1;
|
||||
|
@ -2411,6 +2414,11 @@ static void md_update_sb(struct mddev * mddev, int force_change)
|
|||
int nospares = 0;
|
||||
int any_badblocks_changed = 0;
|
||||
|
||||
if (mddev->ro) {
|
||||
if (force_change)
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
return;
|
||||
}
|
||||
repeat:
|
||||
/* First make sure individual recovery_offsets are correct */
|
||||
rdev_for_each(rdev, mddev) {
|
||||
|
@ -2800,12 +2808,10 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
|
|||
/* personality does all needed checks */
|
||||
if (rdev->mddev->pers->hot_remove_disk == NULL)
|
||||
return -EINVAL;
|
||||
err = rdev->mddev->pers->
|
||||
hot_remove_disk(rdev->mddev, rdev);
|
||||
if (err)
|
||||
return err;
|
||||
sysfs_unlink_rdev(rdev->mddev, rdev);
|
||||
rdev->raid_disk = -1;
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
remove_and_add_spares(rdev->mddev, rdev);
|
||||
if (rdev->raid_disk >= 0)
|
||||
return -EBUSY;
|
||||
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
|
||||
md_wakeup_thread(rdev->mddev->thread);
|
||||
} else if (rdev->mddev->pers) {
|
||||
|
@ -3221,7 +3227,7 @@ int md_rdev_init(struct md_rdev *rdev)
|
|||
* be used - I wonder if that matters
|
||||
*/
|
||||
rdev->badblocks.count = 0;
|
||||
rdev->badblocks.shift = 0;
|
||||
rdev->badblocks.shift = -1; /* disabled until explicitly enabled */
|
||||
rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
|
||||
seqlock_init(&rdev->badblocks.lock);
|
||||
if (rdev->badblocks.page == NULL)
|
||||
|
@ -3293,9 +3299,6 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe
|
|||
goto abort_free;
|
||||
}
|
||||
}
|
||||
if (super_format == -1)
|
||||
/* hot-add for 0.90, or non-persistent: so no badblocks */
|
||||
rdev->badblocks.shift = -1;
|
||||
|
||||
return rdev;
|
||||
|
||||
|
@ -4225,8 +4228,6 @@ action_show(struct mddev *mddev, char *page)
|
|||
return sprintf(page, "%s\n", type);
|
||||
}
|
||||
|
||||
static void reap_sync_thread(struct mddev *mddev);
|
||||
|
||||
static ssize_t
|
||||
action_store(struct mddev *mddev, const char *page, size_t len)
|
||||
{
|
||||
|
@ -4241,7 +4242,7 @@ action_store(struct mddev *mddev, const char *page, size_t len)
|
|||
if (cmd_match(page, "idle") || cmd_match(page, "frozen")) {
|
||||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
reap_sync_thread(mddev);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
} else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_NEEDED, &mddev->recovery))
|
||||
|
@ -5279,7 +5280,7 @@ static void __md_stop_writes(struct mddev *mddev)
|
|||
if (mddev->sync_thread) {
|
||||
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
|
||||
reap_sync_thread(mddev);
|
||||
md_reap_sync_thread(mddev);
|
||||
}
|
||||
|
||||
del_timer_sync(&mddev->safemode_timer);
|
||||
|
@ -5287,7 +5288,8 @@ static void __md_stop_writes(struct mddev *mddev)
|
|||
bitmap_flush(mddev);
|
||||
md_super_wait(mddev);
|
||||
|
||||
if (!mddev->in_sync || mddev->flags) {
|
||||
if (mddev->ro == 0 &&
|
||||
(!mddev->in_sync || mddev->flags)) {
|
||||
/* mark array as shutdown cleanly */
|
||||
mddev->in_sync = 1;
|
||||
md_update_sb(mddev, 1);
|
||||
|
@ -5810,7 +5812,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
|
|||
else
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
|
||||
md_update_sb(mddev, 1);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
if (mddev->degraded)
|
||||
set_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
|
@ -5877,6 +5879,9 @@ static int hot_remove_disk(struct mddev * mddev, dev_t dev)
|
|||
if (!rdev)
|
||||
return -ENXIO;
|
||||
|
||||
clear_bit(Blocked, &rdev->flags);
|
||||
remove_and_add_spares(mddev, rdev);
|
||||
|
||||
if (rdev->raid_disk >= 0)
|
||||
goto busy;
|
||||
|
||||
|
@ -6490,6 +6495,28 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
err = md_set_readonly(mddev, bdev);
|
||||
goto done_unlock;
|
||||
|
||||
case HOT_REMOVE_DISK:
|
||||
err = hot_remove_disk(mddev, new_decode_dev(arg));
|
||||
goto done_unlock;
|
||||
|
||||
case ADD_NEW_DISK:
|
||||
/* We can support ADD_NEW_DISK on read-only arrays
|
||||
* on if we are re-adding a preexisting device.
|
||||
* So require mddev->pers and MD_DISK_SYNC.
|
||||
*/
|
||||
if (mddev->pers) {
|
||||
mdu_disk_info_t info;
|
||||
if (copy_from_user(&info, argp, sizeof(info)))
|
||||
err = -EFAULT;
|
||||
else if (!(info.state & (1<<MD_DISK_SYNC)))
|
||||
/* Need to clear read-only for this */
|
||||
break;
|
||||
else
|
||||
err = add_new_disk(mddev, &info);
|
||||
goto done_unlock;
|
||||
}
|
||||
break;
|
||||
|
||||
case BLKROSET:
|
||||
if (get_user(ro, (int __user *)(arg))) {
|
||||
err = -EFAULT;
|
||||
|
@ -6560,10 +6587,6 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
|
|||
goto done_unlock;
|
||||
}
|
||||
|
||||
case HOT_REMOVE_DISK:
|
||||
err = hot_remove_disk(mddev, new_decode_dev(arg));
|
||||
goto done_unlock;
|
||||
|
||||
case HOT_ADD_DISK:
|
||||
err = hot_add_disk(mddev, new_decode_dev(arg));
|
||||
goto done_unlock;
|
||||
|
@ -7644,14 +7667,16 @@ void md_do_sync(struct md_thread *thread)
|
|||
}
|
||||
EXPORT_SYMBOL_GPL(md_do_sync);
|
||||
|
||||
static int remove_and_add_spares(struct mddev *mddev)
|
||||
static int remove_and_add_spares(struct mddev *mddev,
|
||||
struct md_rdev *this)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
int spares = 0;
|
||||
int removed = 0;
|
||||
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
if ((this == NULL || rdev == this) &&
|
||||
rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
(test_bit(Faulty, &rdev->flags) ||
|
||||
! test_bit(In_sync, &rdev->flags)) &&
|
||||
|
@ -7666,72 +7691,50 @@ static int remove_and_add_spares(struct mddev *mddev)
|
|||
if (removed && mddev->kobj.sd)
|
||||
sysfs_notify(&mddev->kobj, NULL, "degraded");
|
||||
|
||||
if (this)
|
||||
goto no_add;
|
||||
|
||||
rdev_for_each(rdev, mddev) {
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(In_sync, &rdev->flags) &&
|
||||
!test_bit(Faulty, &rdev->flags))
|
||||
spares++;
|
||||
if (rdev->raid_disk < 0
|
||||
&& !test_bit(Faulty, &rdev->flags)) {
|
||||
rdev->recovery_offset = 0;
|
||||
if (mddev->pers->
|
||||
hot_add_disk(mddev, rdev) == 0) {
|
||||
if (sysfs_link_rdev(mddev, rdev))
|
||||
/* failure here is OK */;
|
||||
spares++;
|
||||
md_new_event(mddev);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
}
|
||||
if (rdev->raid_disk >= 0)
|
||||
continue;
|
||||
if (test_bit(Faulty, &rdev->flags))
|
||||
continue;
|
||||
if (mddev->ro &&
|
||||
rdev->saved_raid_disk < 0)
|
||||
continue;
|
||||
|
||||
rdev->recovery_offset = 0;
|
||||
if (rdev->saved_raid_disk >= 0 && mddev->in_sync) {
|
||||
spin_lock_irq(&mddev->write_lock);
|
||||
if (mddev->in_sync)
|
||||
/* OK, this device, which is in_sync,
|
||||
* will definitely be noticed before
|
||||
* the next write, so recovery isn't
|
||||
* needed.
|
||||
*/
|
||||
rdev->recovery_offset = mddev->recovery_cp;
|
||||
spin_unlock_irq(&mddev->write_lock);
|
||||
}
|
||||
}
|
||||
if (removed)
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
return spares;
|
||||
}
|
||||
|
||||
static void reap_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
/* success...*/
|
||||
/* activate any spares */
|
||||
if (mddev->pers->spare_active(mddev)) {
|
||||
sysfs_notify(&mddev->kobj, NULL,
|
||||
"degraded");
|
||||
if (mddev->ro && rdev->recovery_offset != MaxSector)
|
||||
/* not safe to add this disk now */
|
||||
continue;
|
||||
if (mddev->pers->
|
||||
hot_add_disk(mddev, rdev) == 0) {
|
||||
if (sysfs_link_rdev(mddev, rdev))
|
||||
/* failure here is OK */;
|
||||
spares++;
|
||||
md_new_event(mddev);
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
}
|
||||
}
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
mddev->pers->finish_reshape)
|
||||
mddev->pers->finish_reshape(mddev);
|
||||
|
||||
/* If array is no-longer degraded, then any saved_raid_disk
|
||||
* information must be scrapped. Also if any device is now
|
||||
* In_sync we must scrape the saved_raid_disk for that device
|
||||
* do the superblock for an incrementally recovered device
|
||||
* written out.
|
||||
*/
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (!mddev->degraded ||
|
||||
test_bit(In_sync, &rdev->flags))
|
||||
rdev->saved_raid_disk = -1;
|
||||
|
||||
md_update_sb(mddev, 1);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
md_new_event(mddev);
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
no_add:
|
||||
if (removed)
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
return spares;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -7789,22 +7792,16 @@ void md_check_recovery(struct mddev *mddev)
|
|||
int spares = 0;
|
||||
|
||||
if (mddev->ro) {
|
||||
/* Only thing we do on a ro array is remove
|
||||
* failed devices.
|
||||
/* On a read-only array we can:
|
||||
* - remove failed devices
|
||||
* - add already-in_sync devices if the array itself
|
||||
* is in-sync.
|
||||
* As we only add devices that are already in-sync,
|
||||
* we can activate the spares immediately.
|
||||
*/
|
||||
struct md_rdev *rdev;
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (rdev->raid_disk >= 0 &&
|
||||
!test_bit(Blocked, &rdev->flags) &&
|
||||
test_bit(Faulty, &rdev->flags) &&
|
||||
atomic_read(&rdev->nr_pending)==0) {
|
||||
if (mddev->pers->hot_remove_disk(
|
||||
mddev, rdev) == 0) {
|
||||
sysfs_unlink_rdev(mddev, rdev);
|
||||
rdev->raid_disk = -1;
|
||||
}
|
||||
}
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
remove_and_add_spares(mddev, NULL);
|
||||
mddev->pers->spare_active(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
|
@ -7836,7 +7833,7 @@ void md_check_recovery(struct mddev *mddev)
|
|||
goto unlock;
|
||||
}
|
||||
if (mddev->sync_thread) {
|
||||
reap_sync_thread(mddev);
|
||||
md_reap_sync_thread(mddev);
|
||||
goto unlock;
|
||||
}
|
||||
/* Set RUNNING before clearing NEEDED to avoid
|
||||
|
@ -7867,7 +7864,7 @@ void md_check_recovery(struct mddev *mddev)
|
|||
goto unlock;
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
} else if ((spares = remove_and_add_spares(mddev))) {
|
||||
} else if ((spares = remove_and_add_spares(mddev, NULL))) {
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
|
@ -7917,6 +7914,51 @@ void md_check_recovery(struct mddev *mddev)
|
|||
}
|
||||
}
|
||||
|
||||
void md_reap_sync_thread(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *rdev;
|
||||
|
||||
/* resync has finished, collect result */
|
||||
md_unregister_thread(&mddev->sync_thread);
|
||||
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
||||
/* success...*/
|
||||
/* activate any spares */
|
||||
if (mddev->pers->spare_active(mddev)) {
|
||||
sysfs_notify(&mddev->kobj, NULL,
|
||||
"degraded");
|
||||
set_bit(MD_CHANGE_DEVS, &mddev->flags);
|
||||
}
|
||||
}
|
||||
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
|
||||
mddev->pers->finish_reshape)
|
||||
mddev->pers->finish_reshape(mddev);
|
||||
|
||||
/* If array is no-longer degraded, then any saved_raid_disk
|
||||
* information must be scrapped. Also if any device is now
|
||||
* In_sync we must scrape the saved_raid_disk for that device
|
||||
* do the superblock for an incrementally recovered device
|
||||
* written out.
|
||||
*/
|
||||
rdev_for_each(rdev, mddev)
|
||||
if (!mddev->degraded ||
|
||||
test_bit(In_sync, &rdev->flags))
|
||||
rdev->saved_raid_disk = -1;
|
||||
|
||||
md_update_sb(mddev, 1);
|
||||
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
/* flag recovery needed just to double check */
|
||||
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
sysfs_notify_dirent_safe(mddev->sysfs_action);
|
||||
md_new_event(mddev);
|
||||
if (mddev->event_work.func)
|
||||
queue_work(md_misc_wq, &mddev->event_work);
|
||||
}
|
||||
|
||||
void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
|
||||
{
|
||||
sysfs_notify_dirent_safe(rdev->sysfs_state);
|
||||
|
@ -8642,6 +8684,7 @@ EXPORT_SYMBOL(md_register_thread);
|
|||
EXPORT_SYMBOL(md_unregister_thread);
|
||||
EXPORT_SYMBOL(md_wakeup_thread);
|
||||
EXPORT_SYMBOL(md_check_recovery);
|
||||
EXPORT_SYMBOL(md_reap_sync_thread);
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_DESCRIPTION("MD RAID framework");
|
||||
MODULE_ALIAS("md");
|
||||
|
|
|
@ -567,6 +567,7 @@ extern struct md_thread *md_register_thread(
|
|||
extern void md_unregister_thread(struct md_thread **threadp);
|
||||
extern void md_wakeup_thread(struct md_thread *thread);
|
||||
extern void md_check_recovery(struct mddev *mddev);
|
||||
extern void md_reap_sync_thread(struct mddev *mddev);
|
||||
extern void md_write_start(struct mddev *mddev, struct bio *bi);
|
||||
extern void md_write_end(struct mddev *mddev);
|
||||
extern void md_done_sync(struct mddev *mddev, int blocks, int ok);
|
||||
|
|
|
@ -981,7 +981,12 @@ static void raid1_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
generic_make_request(bio);
|
||||
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio, 0);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
bio = next;
|
||||
}
|
||||
kfree(plug);
|
||||
|
@ -2901,6 +2906,7 @@ static int stop(struct mddev *mddev)
|
|||
if (conf->r1bio_pool)
|
||||
mempool_destroy(conf->r1bio_pool);
|
||||
kfree(conf->mirrors);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->poolinfo);
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
|
|
|
@ -1133,7 +1133,12 @@ static void raid10_unplug(struct blk_plug_cb *cb, bool from_schedule)
|
|||
while (bio) { /* submit pending writes */
|
||||
struct bio *next = bio->bi_next;
|
||||
bio->bi_next = NULL;
|
||||
generic_make_request(bio);
|
||||
if (unlikely((bio->bi_rw & REQ_DISCARD) &&
|
||||
!blk_queue_discard(bdev_get_queue(bio->bi_bdev))))
|
||||
/* Just ignore it */
|
||||
bio_endio(bio, 0);
|
||||
else
|
||||
generic_make_request(bio);
|
||||
bio = next;
|
||||
}
|
||||
kfree(plug);
|
||||
|
@ -2913,6 +2918,22 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
|
|||
if (init_resync(conf))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* Allow skipping a full rebuild for incremental assembly
|
||||
* of a clean array, like RAID1 does.
|
||||
*/
|
||||
if (mddev->bitmap == NULL &&
|
||||
mddev->recovery_cp == MaxSector &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
conf->fullsync == 0) {
|
||||
*skipped = 1;
|
||||
max_sector = mddev->dev_sectors;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
|
||||
max_sector = mddev->resync_max_sectors;
|
||||
return max_sector - sector_nr;
|
||||
}
|
||||
|
||||
skipped:
|
||||
max_sector = mddev->dev_sectors;
|
||||
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
|
||||
|
@ -3810,6 +3831,7 @@ static int stop(struct mddev *mddev)
|
|||
|
||||
if (conf->r10bio_pool)
|
||||
mempool_destroy(conf->r10bio_pool);
|
||||
safe_put_page(conf->tmppage);
|
||||
kfree(conf->mirrors);
|
||||
kfree(conf);
|
||||
mddev->private = NULL;
|
||||
|
|
|
@ -1887,8 +1887,15 @@ static void raid5_end_write_request(struct bio *bi, int error)
|
|||
&rdev->mddev->recovery);
|
||||
} else if (is_badblock(rdev, sh->sector,
|
||||
STRIPE_SECTORS,
|
||||
&first_bad, &bad_sectors))
|
||||
&first_bad, &bad_sectors)) {
|
||||
set_bit(R5_MadeGood, &sh->dev[i].flags);
|
||||
if (test_bit(R5_ReadError, &sh->dev[i].flags))
|
||||
/* That was a successful write so make
|
||||
* sure it looks like we already did
|
||||
* a re-write.
|
||||
*/
|
||||
set_bit(R5_ReWrite, &sh->dev[i].flags);
|
||||
}
|
||||
}
|
||||
rdev_dec_pending(rdev, conf->mddev);
|
||||
|
||||
|
@ -4672,9 +4679,10 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int
|
|||
*skipped = 1;
|
||||
return rv;
|
||||
}
|
||||
if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
|
||||
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
!conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
|
||||
if (!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
|
||||
!conf->fullsync &&
|
||||
!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
|
||||
sync_blocks >= STRIPE_SECTORS) {
|
||||
/* we can skip this block, and probably more */
|
||||
sync_blocks /= STRIPE_SECTORS;
|
||||
*skipped = 1;
|
||||
|
|
Loading…
Reference in New Issue