md/raid1: split out two sub-functions from sync_request_write
sync_request_write is too big and too deep. So split out two self-contains bits of functionality into separate function. Signed-off-by: NeilBrown <neilb@suse.de>
This commit is contained in:
parent
6f8d0c77ce
commit
a68e587035
|
@ -1181,6 +1181,195 @@ static void end_sync_write(struct bio *bio, int error)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int fix_sync_read_error(r1bio_t *r1_bio)
|
||||||
|
{
|
||||||
|
/* Try some synchronous reads of other devices to get
|
||||||
|
* good data, much like with normal read errors. Only
|
||||||
|
* read into the pages we already have so we don't
|
||||||
|
* need to re-issue the read request.
|
||||||
|
* We don't need to freeze the array, because being in an
|
||||||
|
* active sync request, there is no normal IO, and
|
||||||
|
* no overlapping syncs.
|
||||||
|
*/
|
||||||
|
mddev_t *mddev = r1_bio->mddev;
|
||||||
|
conf_t *conf = mddev->private;
|
||||||
|
struct bio *bio = r1_bio->bios[r1_bio->read_disk];
|
||||||
|
sector_t sect = r1_bio->sector;
|
||||||
|
int sectors = r1_bio->sectors;
|
||||||
|
int idx = 0;
|
||||||
|
|
||||||
|
while(sectors) {
|
||||||
|
int s = sectors;
|
||||||
|
int d = r1_bio->read_disk;
|
||||||
|
int success = 0;
|
||||||
|
mdk_rdev_t *rdev;
|
||||||
|
|
||||||
|
if (s > (PAGE_SIZE>>9))
|
||||||
|
s = PAGE_SIZE >> 9;
|
||||||
|
do {
|
||||||
|
if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
|
||||||
|
/* No rcu protection needed here devices
|
||||||
|
* can only be removed when no resync is
|
||||||
|
* active, and resync is currently active
|
||||||
|
*/
|
||||||
|
rdev = conf->mirrors[d].rdev;
|
||||||
|
if (sync_page_io(rdev,
|
||||||
|
sect,
|
||||||
|
s<<9,
|
||||||
|
bio->bi_io_vec[idx].bv_page,
|
||||||
|
READ, false)) {
|
||||||
|
success = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
d++;
|
||||||
|
if (d == conf->raid_disks)
|
||||||
|
d = 0;
|
||||||
|
} while (!success && d != r1_bio->read_disk);
|
||||||
|
|
||||||
|
if (success) {
|
||||||
|
int start = d;
|
||||||
|
/* write it back and re-read */
|
||||||
|
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
||||||
|
while (d != r1_bio->read_disk) {
|
||||||
|
if (d == 0)
|
||||||
|
d = conf->raid_disks;
|
||||||
|
d--;
|
||||||
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
|
continue;
|
||||||
|
rdev = conf->mirrors[d].rdev;
|
||||||
|
atomic_add(s, &rdev->corrected_errors);
|
||||||
|
if (sync_page_io(rdev,
|
||||||
|
sect,
|
||||||
|
s<<9,
|
||||||
|
bio->bi_io_vec[idx].bv_page,
|
||||||
|
WRITE, false) == 0)
|
||||||
|
md_error(mddev, rdev);
|
||||||
|
}
|
||||||
|
d = start;
|
||||||
|
while (d != r1_bio->read_disk) {
|
||||||
|
if (d == 0)
|
||||||
|
d = conf->raid_disks;
|
||||||
|
d--;
|
||||||
|
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
||||||
|
continue;
|
||||||
|
rdev = conf->mirrors[d].rdev;
|
||||||
|
if (sync_page_io(rdev,
|
||||||
|
sect,
|
||||||
|
s<<9,
|
||||||
|
bio->bi_io_vec[idx].bv_page,
|
||||||
|
READ, false) == 0)
|
||||||
|
md_error(mddev, rdev);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
char b[BDEVNAME_SIZE];
|
||||||
|
/* Cannot read from anywhere, array is toast */
|
||||||
|
md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
|
||||||
|
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
|
||||||
|
" for block %llu\n",
|
||||||
|
mdname(mddev),
|
||||||
|
bdevname(bio->bi_bdev, b),
|
||||||
|
(unsigned long long)r1_bio->sector);
|
||||||
|
md_done_sync(mddev, r1_bio->sectors, 0);
|
||||||
|
put_buf(r1_bio);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
sectors -= s;
|
||||||
|
sect += s;
|
||||||
|
idx ++;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int process_checks(r1bio_t *r1_bio)
|
||||||
|
{
|
||||||
|
/* We have read all readable devices. If we haven't
|
||||||
|
* got the block, then there is no hope left.
|
||||||
|
* If we have, then we want to do a comparison
|
||||||
|
* and skip the write if everything is the same.
|
||||||
|
* If any blocks failed to read, then we need to
|
||||||
|
* attempt an over-write
|
||||||
|
*/
|
||||||
|
mddev_t *mddev = r1_bio->mddev;
|
||||||
|
conf_t *conf = mddev->private;
|
||||||
|
int primary;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
||||||
|
for (i=0; i<mddev->raid_disks; i++)
|
||||||
|
if (r1_bio->bios[i]->bi_end_io == end_sync_read)
|
||||||
|
md_error(mddev, conf->mirrors[i].rdev);
|
||||||
|
|
||||||
|
md_done_sync(mddev, r1_bio->sectors, 1);
|
||||||
|
put_buf(r1_bio);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
for (primary=0; primary<mddev->raid_disks; primary++)
|
||||||
|
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
||||||
|
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
||||||
|
r1_bio->bios[primary]->bi_end_io = NULL;
|
||||||
|
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
r1_bio->read_disk = primary;
|
||||||
|
for (i=0; i<mddev->raid_disks; i++)
|
||||||
|
if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
|
||||||
|
int j;
|
||||||
|
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
||||||
|
struct bio *pbio = r1_bio->bios[primary];
|
||||||
|
struct bio *sbio = r1_bio->bios[i];
|
||||||
|
|
||||||
|
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
|
||||||
|
for (j = vcnt; j-- ; ) {
|
||||||
|
struct page *p, *s;
|
||||||
|
p = pbio->bi_io_vec[j].bv_page;
|
||||||
|
s = sbio->bi_io_vec[j].bv_page;
|
||||||
|
if (memcmp(page_address(p),
|
||||||
|
page_address(s),
|
||||||
|
PAGE_SIZE))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else
|
||||||
|
j = 0;
|
||||||
|
if (j >= 0)
|
||||||
|
mddev->resync_mismatches += r1_bio->sectors;
|
||||||
|
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
|
||||||
|
&& test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
|
||||||
|
sbio->bi_end_io = NULL;
|
||||||
|
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
|
||||||
|
} else {
|
||||||
|
/* fixup the bio for reuse */
|
||||||
|
int size;
|
||||||
|
sbio->bi_vcnt = vcnt;
|
||||||
|
sbio->bi_size = r1_bio->sectors << 9;
|
||||||
|
sbio->bi_idx = 0;
|
||||||
|
sbio->bi_phys_segments = 0;
|
||||||
|
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
||||||
|
sbio->bi_flags |= 1 << BIO_UPTODATE;
|
||||||
|
sbio->bi_next = NULL;
|
||||||
|
sbio->bi_sector = r1_bio->sector +
|
||||||
|
conf->mirrors[i].rdev->data_offset;
|
||||||
|
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
||||||
|
size = sbio->bi_size;
|
||||||
|
for (j = 0; j < vcnt ; j++) {
|
||||||
|
struct bio_vec *bi;
|
||||||
|
bi = &sbio->bi_io_vec[j];
|
||||||
|
bi->bv_offset = 0;
|
||||||
|
if (size > PAGE_SIZE)
|
||||||
|
bi->bv_len = PAGE_SIZE;
|
||||||
|
else
|
||||||
|
bi->bv_len = size;
|
||||||
|
size -= PAGE_SIZE;
|
||||||
|
memcpy(page_address(bi->bv_page),
|
||||||
|
page_address(pbio->bi_io_vec[j].bv_page),
|
||||||
|
PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
|
static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
|
||||||
{
|
{
|
||||||
conf_t *conf = mddev->private;
|
conf_t *conf = mddev->private;
|
||||||
|
@ -1191,184 +1380,14 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
|
||||||
bio = r1_bio->bios[r1_bio->read_disk];
|
bio = r1_bio->bios[r1_bio->read_disk];
|
||||||
|
|
||||||
|
|
||||||
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
|
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
|
||||||
/* We have read all readable devices. If we haven't
|
if (process_checks(r1_bio) < 0)
|
||||||
* got the block, then there is no hope left.
|
|
||||||
* If we have, then we want to do a comparison
|
|
||||||
* and skip the write if everything is the same.
|
|
||||||
* If any blocks failed to read, then we need to
|
|
||||||
* attempt an over-write
|
|
||||||
*/
|
|
||||||
int primary;
|
|
||||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
|
||||||
for (i=0; i<mddev->raid_disks; i++)
|
|
||||||
if (r1_bio->bios[i]->bi_end_io == end_sync_read)
|
|
||||||
md_error(mddev, conf->mirrors[i].rdev);
|
|
||||||
|
|
||||||
md_done_sync(mddev, r1_bio->sectors, 1);
|
|
||||||
put_buf(r1_bio);
|
|
||||||
return;
|
return;
|
||||||
}
|
|
||||||
for (primary=0; primary<mddev->raid_disks; primary++)
|
|
||||||
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
|
|
||||||
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
|
|
||||||
r1_bio->bios[primary]->bi_end_io = NULL;
|
|
||||||
rdev_dec_pending(conf->mirrors[primary].rdev, mddev);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
r1_bio->read_disk = primary;
|
|
||||||
for (i=0; i<mddev->raid_disks; i++)
|
|
||||||
if (r1_bio->bios[i]->bi_end_io == end_sync_read) {
|
|
||||||
int j;
|
|
||||||
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
|
|
||||||
struct bio *pbio = r1_bio->bios[primary];
|
|
||||||
struct bio *sbio = r1_bio->bios[i];
|
|
||||||
|
|
||||||
if (test_bit(BIO_UPTODATE, &sbio->bi_flags)) {
|
|
||||||
for (j = vcnt; j-- ; ) {
|
|
||||||
struct page *p, *s;
|
|
||||||
p = pbio->bi_io_vec[j].bv_page;
|
|
||||||
s = sbio->bi_io_vec[j].bv_page;
|
|
||||||
if (memcmp(page_address(p),
|
|
||||||
page_address(s),
|
|
||||||
PAGE_SIZE))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else
|
|
||||||
j = 0;
|
|
||||||
if (j >= 0)
|
|
||||||
mddev->resync_mismatches += r1_bio->sectors;
|
|
||||||
if (j < 0 || (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)
|
|
||||||
&& test_bit(BIO_UPTODATE, &sbio->bi_flags))) {
|
|
||||||
sbio->bi_end_io = NULL;
|
|
||||||
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
|
|
||||||
} else {
|
|
||||||
/* fixup the bio for reuse */
|
|
||||||
int size;
|
|
||||||
sbio->bi_vcnt = vcnt;
|
|
||||||
sbio->bi_size = r1_bio->sectors << 9;
|
|
||||||
sbio->bi_idx = 0;
|
|
||||||
sbio->bi_phys_segments = 0;
|
|
||||||
sbio->bi_flags &= ~(BIO_POOL_MASK - 1);
|
|
||||||
sbio->bi_flags |= 1 << BIO_UPTODATE;
|
|
||||||
sbio->bi_next = NULL;
|
|
||||||
sbio->bi_sector = r1_bio->sector +
|
|
||||||
conf->mirrors[i].rdev->data_offset;
|
|
||||||
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
|
|
||||||
size = sbio->bi_size;
|
|
||||||
for (j = 0; j < vcnt ; j++) {
|
|
||||||
struct bio_vec *bi;
|
|
||||||
bi = &sbio->bi_io_vec[j];
|
|
||||||
bi->bv_offset = 0;
|
|
||||||
if (size > PAGE_SIZE)
|
|
||||||
bi->bv_len = PAGE_SIZE;
|
|
||||||
else
|
|
||||||
bi->bv_len = size;
|
|
||||||
size -= PAGE_SIZE;
|
|
||||||
memcpy(page_address(bi->bv_page),
|
|
||||||
page_address(pbio->bi_io_vec[j].bv_page),
|
|
||||||
PAGE_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) {
|
|
||||||
/* ouch - failed to read all of that.
|
|
||||||
* Try some synchronous reads of other devices to get
|
|
||||||
* good data, much like with normal read errors. Only
|
|
||||||
* read into the pages we already have so we don't
|
|
||||||
* need to re-issue the read request.
|
|
||||||
* We don't need to freeze the array, because being in an
|
|
||||||
* active sync request, there is no normal IO, and
|
|
||||||
* no overlapping syncs.
|
|
||||||
*/
|
|
||||||
sector_t sect = r1_bio->sector;
|
|
||||||
int sectors = r1_bio->sectors;
|
|
||||||
int idx = 0;
|
|
||||||
|
|
||||||
while(sectors) {
|
|
||||||
int s = sectors;
|
|
||||||
int d = r1_bio->read_disk;
|
|
||||||
int success = 0;
|
|
||||||
mdk_rdev_t *rdev;
|
|
||||||
|
|
||||||
if (s > (PAGE_SIZE>>9))
|
|
||||||
s = PAGE_SIZE >> 9;
|
|
||||||
do {
|
|
||||||
if (r1_bio->bios[d]->bi_end_io == end_sync_read) {
|
|
||||||
/* No rcu protection needed here devices
|
|
||||||
* can only be removed when no resync is
|
|
||||||
* active, and resync is currently active
|
|
||||||
*/
|
|
||||||
rdev = conf->mirrors[d].rdev;
|
|
||||||
if (sync_page_io(rdev,
|
|
||||||
sect,
|
|
||||||
s<<9,
|
|
||||||
bio->bi_io_vec[idx].bv_page,
|
|
||||||
READ, false)) {
|
|
||||||
success = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
d++;
|
|
||||||
if (d == conf->raid_disks)
|
|
||||||
d = 0;
|
|
||||||
} while (!success && d != r1_bio->read_disk);
|
|
||||||
|
|
||||||
if (success) {
|
|
||||||
int start = d;
|
|
||||||
/* write it back and re-read */
|
|
||||||
set_bit(R1BIO_Uptodate, &r1_bio->state);
|
|
||||||
while (d != r1_bio->read_disk) {
|
|
||||||
if (d == 0)
|
|
||||||
d = conf->raid_disks;
|
|
||||||
d--;
|
|
||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
|
||||||
continue;
|
|
||||||
rdev = conf->mirrors[d].rdev;
|
|
||||||
atomic_add(s, &rdev->corrected_errors);
|
|
||||||
if (sync_page_io(rdev,
|
|
||||||
sect,
|
|
||||||
s<<9,
|
|
||||||
bio->bi_io_vec[idx].bv_page,
|
|
||||||
WRITE, false) == 0)
|
|
||||||
md_error(mddev, rdev);
|
|
||||||
}
|
|
||||||
d = start;
|
|
||||||
while (d != r1_bio->read_disk) {
|
|
||||||
if (d == 0)
|
|
||||||
d = conf->raid_disks;
|
|
||||||
d--;
|
|
||||||
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
|
|
||||||
continue;
|
|
||||||
rdev = conf->mirrors[d].rdev;
|
|
||||||
if (sync_page_io(rdev,
|
|
||||||
sect,
|
|
||||||
s<<9,
|
|
||||||
bio->bi_io_vec[idx].bv_page,
|
|
||||||
READ, false) == 0)
|
|
||||||
md_error(mddev, rdev);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
char b[BDEVNAME_SIZE];
|
|
||||||
/* Cannot read from anywhere, array is toast */
|
|
||||||
md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
|
|
||||||
printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error"
|
|
||||||
" for block %llu\n",
|
|
||||||
mdname(mddev),
|
|
||||||
bdevname(bio->bi_bdev, b),
|
|
||||||
(unsigned long long)r1_bio->sector);
|
|
||||||
md_done_sync(mddev, r1_bio->sectors, 0);
|
|
||||||
put_buf(r1_bio);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
sectors -= s;
|
|
||||||
sect += s;
|
|
||||||
idx ++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
|
||||||
|
/* ouch - failed to read all of that. */
|
||||||
|
if (!fix_sync_read_error(r1_bio))
|
||||||
|
return;
|
||||||
/*
|
/*
|
||||||
* schedule writes
|
* schedule writes
|
||||||
*/
|
*/
|
||||||
|
|
Loading…
Reference in New Issue