md/raid10: avoid writing to known bad blocks on known bad drives.
Writing to known bad blocks on drives that have seen a write error is asking for trouble. So try to avoid these blocks. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
e875ecea26
commit
d4432c23be
|
@ -807,6 +807,8 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
mdk_rdev_t *blocked_rdev;
|
mdk_rdev_t *blocked_rdev;
|
||||||
int plugged;
|
int plugged;
|
||||||
|
int sectors_handled;
|
||||||
|
int max_sectors;
|
||||||
|
|
||||||
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
if (unlikely(bio->bi_rw & REQ_FLUSH)) {
|
||||||
md_flush_request(mddev, bio);
|
md_flush_request(mddev, bio);
|
||||||
|
@ -895,7 +897,6 @@ static int make_request(mddev_t *mddev, struct bio * bio)
|
||||||
/*
|
/*
|
||||||
* read balancing logic:
|
* read balancing logic:
|
||||||
*/
|
*/
|
||||||
int max_sectors;
|
|
||||||
int disk;
|
int disk;
|
||||||
int slot;
|
int slot;
|
||||||
|
|
||||||
|
@ -925,8 +926,6 @@ read_again:
|
||||||
/* Could not read all from this device, so we will
|
/* Could not read all from this device, so we will
|
||||||
* need another r10_bio.
|
* need another r10_bio.
|
||||||
*/
|
*/
|
||||||
int sectors_handled;
|
|
||||||
|
|
||||||
sectors_handled = (r10_bio->sectors + max_sectors
|
sectors_handled = (r10_bio->sectors + max_sectors
|
||||||
- bio->bi_sector);
|
- bio->bi_sector);
|
||||||
r10_bio->sectors = max_sectors;
|
r10_bio->sectors = max_sectors;
|
||||||
|
@ -963,13 +962,22 @@ read_again:
|
||||||
/* first select target devices under rcu_lock and
|
/* first select target devices under rcu_lock and
|
||||||
* inc refcount on their rdev. Record them by setting
|
* inc refcount on their rdev. Record them by setting
|
||||||
* bios[x] to bio
|
* bios[x] to bio
|
||||||
|
* If there are known/acknowledged bad blocks on any device
|
||||||
|
* on which we have seen a write error, we want to avoid
|
||||||
|
* writing to those blocks. This potentially requires several
|
||||||
|
* writes to write around the bad blocks. Each set of writes
|
||||||
|
* gets its own r10_bio with a set of bios attached. The number
|
||||||
|
* of r10_bios is recored in bio->bi_phys_segments just as with
|
||||||
|
* the read case.
|
||||||
*/
|
*/
|
||||||
plugged = mddev_check_plugged(mddev);
|
plugged = mddev_check_plugged(mddev);
|
||||||
|
|
||||||
raid10_find_phys(conf, r10_bio);
|
raid10_find_phys(conf, r10_bio);
|
||||||
retry_write:
|
retry_write:
|
||||||
blocked_rdev = NULL;
|
blocked_rdev = NULL;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
|
max_sectors = r10_bio->sectors;
|
||||||
|
|
||||||
for (i = 0; i < conf->copies; i++) {
|
for (i = 0; i < conf->copies; i++) {
|
||||||
int d = r10_bio->devs[i].devnum;
|
int d = r10_bio->devs[i].devnum;
|
||||||
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev);
|
||||||
|
@ -978,13 +986,55 @@ read_again:
|
||||||
blocked_rdev = rdev;
|
blocked_rdev = rdev;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (rdev && !test_bit(Faulty, &rdev->flags)) {
|
r10_bio->devs[i].bio = NULL;
|
||||||
atomic_inc(&rdev->nr_pending);
|
if (!rdev || test_bit(Faulty, &rdev->flags)) {
|
||||||
r10_bio->devs[i].bio = bio;
|
|
||||||
} else {
|
|
||||||
r10_bio->devs[i].bio = NULL;
|
|
||||||
set_bit(R10BIO_Degraded, &r10_bio->state);
|
set_bit(R10BIO_Degraded, &r10_bio->state);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
if (test_bit(WriteErrorSeen, &rdev->flags)) {
|
||||||
|
sector_t first_bad;
|
||||||
|
sector_t dev_sector = r10_bio->devs[i].addr;
|
||||||
|
int bad_sectors;
|
||||||
|
int is_bad;
|
||||||
|
|
||||||
|
is_bad = is_badblock(rdev, dev_sector,
|
||||||
|
max_sectors,
|
||||||
|
&first_bad, &bad_sectors);
|
||||||
|
if (is_bad < 0) {
|
||||||
|
/* Mustn't write here until the bad block
|
||||||
|
* is acknowledged
|
||||||
|
*/
|
||||||
|
atomic_inc(&rdev->nr_pending);
|
||||||
|
set_bit(BlockedBadBlocks, &rdev->flags);
|
||||||
|
blocked_rdev = rdev;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (is_bad && first_bad <= dev_sector) {
|
||||||
|
/* Cannot write here at all */
|
||||||
|
bad_sectors -= (dev_sector - first_bad);
|
||||||
|
if (bad_sectors < max_sectors)
|
||||||
|
/* Mustn't write more than bad_sectors
|
||||||
|
* to other devices yet
|
||||||
|
*/
|
||||||
|
max_sectors = bad_sectors;
|
||||||
|
/* We don't set R10BIO_Degraded as that
|
||||||
|
* only applies if the disk is missing,
|
||||||
|
* so it might be re-added, and we want to
|
||||||
|
* know to recover this chunk.
|
||||||
|
* In this case the device is here, and the
|
||||||
|
* fact that this chunk is not in-sync is
|
||||||
|
* recorded in the bad block log.
|
||||||
|
*/
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (is_bad) {
|
||||||
|
int good_sectors = first_bad - dev_sector;
|
||||||
|
if (good_sectors < max_sectors)
|
||||||
|
max_sectors = good_sectors;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r10_bio->devs[i].bio = bio;
|
||||||
|
atomic_inc(&rdev->nr_pending);
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
@ -1004,8 +1054,22 @@ read_again:
|
||||||
goto retry_write;
|
goto retry_write;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (max_sectors < r10_bio->sectors) {
|
||||||
|
/* We are splitting this into multiple parts, so
|
||||||
|
* we need to prepare for allocating another r10_bio.
|
||||||
|
*/
|
||||||
|
r10_bio->sectors = max_sectors;
|
||||||
|
spin_lock_irq(&conf->device_lock);
|
||||||
|
if (bio->bi_phys_segments == 0)
|
||||||
|
bio->bi_phys_segments = 2;
|
||||||
|
else
|
||||||
|
bio->bi_phys_segments++;
|
||||||
|
spin_unlock_irq(&conf->device_lock);
|
||||||
|
}
|
||||||
|
sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector;
|
||||||
|
|
||||||
atomic_set(&r10_bio->remaining, 1);
|
atomic_set(&r10_bio->remaining, 1);
|
||||||
bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
|
bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0);
|
||||||
|
|
||||||
for (i = 0; i < conf->copies; i++) {
|
for (i = 0; i < conf->copies; i++) {
|
||||||
struct bio *mbio;
|
struct bio *mbio;
|
||||||
|
@ -1014,10 +1078,12 @@ read_again:
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
mbio = bio_clone_mddev(bio, GFP_NOIO, mddev);
|
||||||
|
md_trim_bio(mbio, r10_bio->sector - bio->bi_sector,
|
||||||
|
max_sectors);
|
||||||
r10_bio->devs[i].bio = mbio;
|
r10_bio->devs[i].bio = mbio;
|
||||||
|
|
||||||
mbio->bi_sector = r10_bio->devs[i].addr+
|
mbio->bi_sector = (r10_bio->devs[i].addr+
|
||||||
conf->mirrors[d].rdev->data_offset;
|
conf->mirrors[d].rdev->data_offset);
|
||||||
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
mbio->bi_bdev = conf->mirrors[d].rdev->bdev;
|
||||||
mbio->bi_end_io = raid10_end_write_request;
|
mbio->bi_end_io = raid10_end_write_request;
|
||||||
mbio->bi_rw = WRITE | do_sync | do_fua;
|
mbio->bi_rw = WRITE | do_sync | do_fua;
|
||||||
|
@ -1042,6 +1108,21 @@ read_again:
|
||||||
/* In case raid10d snuck in to freeze_array */
|
/* In case raid10d snuck in to freeze_array */
|
||||||
wake_up(&conf->wait_barrier);
|
wake_up(&conf->wait_barrier);
|
||||||
|
|
||||||
|
if (sectors_handled < (bio->bi_size >> 9)) {
|
||||||
|
/* We need another r1_bio. It has already been counted
|
||||||
|
* in bio->bi_phys_segments.
|
||||||
|
*/
|
||||||
|
r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO);
|
||||||
|
|
||||||
|
r10_bio->master_bio = bio;
|
||||||
|
r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled;
|
||||||
|
|
||||||
|
r10_bio->mddev = mddev;
|
||||||
|
r10_bio->sector = bio->bi_sector + sectors_handled;
|
||||||
|
r10_bio->state = 0;
|
||||||
|
goto retry_write;
|
||||||
|
}
|
||||||
|
|
||||||
if (do_sync || !mddev->bitmap || !plugged)
|
if (do_sync || !mddev->bitmap || !plugged)
|
||||||
md_wakeup_thread(mddev->thread);
|
md_wakeup_thread(mddev->thread);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue