[PATCH] md: fix various bugs with aligned reads in RAID5
It is possible for raid5 to be sent a bio that is too big for an underlying device. So if it is a READ that we pass stright down to a device, it will fail and confuse RAID5. So in 'chunk_aligned_read' we check that the bio fits within the parameters for the target device and if it doesn't fit, fall back on reading through the stripe cache and making lots of one-page requests. Note that this is the earliest time we can check against the device because earlier we don't have a lock on the device, so it could change underneath us. Also, the code for handling a retry through the cache when a read fails has not been tested and was badly broken. This patch fixes that code. Signed-off-by: Neil Brown <neilb@suse.de> Cc: "Kai" <epimetreus@fastmail.fm> Cc: <stable@suse.de> Cc: <org@suse.de> Cc: Jens Axboe <jens.axboe@oracle.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
This commit is contained in:
parent
6649a38632
commit
387bb17374
|
@ -1264,7 +1264,7 @@ new_hw_segment:
|
||||||
bio->bi_hw_segments = nr_hw_segs;
|
bio->bi_hw_segments = nr_hw_segs;
|
||||||
bio->bi_flags |= (1 << BIO_SEG_VALID);
|
bio->bi_flags |= (1 << BIO_SEG_VALID);
|
||||||
}
|
}
|
||||||
|
EXPORT_SYMBOL(blk_recount_segments);
|
||||||
|
|
||||||
static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
|
static int blk_phys_contig_segment(request_queue_t *q, struct bio *bio,
|
||||||
struct bio *nxt)
|
struct bio *nxt)
|
||||||
|
|
|
@ -2620,7 +2620,7 @@ static struct bio *remove_bio_from_retry(raid5_conf_t *conf)
|
||||||
}
|
}
|
||||||
bi = conf->retry_read_aligned_list;
|
bi = conf->retry_read_aligned_list;
|
||||||
if(bi) {
|
if(bi) {
|
||||||
conf->retry_read_aligned = bi->bi_next;
|
conf->retry_read_aligned_list = bi->bi_next;
|
||||||
bi->bi_next = NULL;
|
bi->bi_next = NULL;
|
||||||
bi->bi_phys_segments = 1; /* biased count of active stripes */
|
bi->bi_phys_segments = 1; /* biased count of active stripes */
|
||||||
bi->bi_hw_segments = 0; /* count of processed stripes */
|
bi->bi_hw_segments = 0; /* count of processed stripes */
|
||||||
|
@ -2669,6 +2669,27 @@ static int raid5_align_endio(struct bio *bi, unsigned int bytes, int error)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int bio_fits_rdev(struct bio *bi)
|
||||||
|
{
|
||||||
|
request_queue_t *q = bdev_get_queue(bi->bi_bdev);
|
||||||
|
|
||||||
|
if ((bi->bi_size>>9) > q->max_sectors)
|
||||||
|
return 0;
|
||||||
|
blk_recount_segments(q, bi);
|
||||||
|
if (bi->bi_phys_segments > q->max_phys_segments ||
|
||||||
|
bi->bi_hw_segments > q->max_hw_segments)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
if (q->merge_bvec_fn)
|
||||||
|
/* it's too hard to apply the merge_bvec_fn at this stage,
|
||||||
|
* just just give up
|
||||||
|
*/
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
|
static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
|
||||||
{
|
{
|
||||||
mddev_t *mddev = q->queuedata;
|
mddev_t *mddev = q->queuedata;
|
||||||
|
@ -2715,6 +2736,13 @@ static int chunk_aligned_read(request_queue_t *q, struct bio * raid_bio)
|
||||||
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
|
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
|
||||||
align_bi->bi_sector += rdev->data_offset;
|
align_bi->bi_sector += rdev->data_offset;
|
||||||
|
|
||||||
|
if (!bio_fits_rdev(align_bi)) {
|
||||||
|
/* too big in some way */
|
||||||
|
bio_put(align_bi);
|
||||||
|
rdev_dec_pending(rdev, mddev);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
spin_lock_irq(&conf->device_lock);
|
spin_lock_irq(&conf->device_lock);
|
||||||
wait_event_lock_irq(conf->wait_for_stripe,
|
wait_event_lock_irq(conf->wait_for_stripe,
|
||||||
conf->quiesce == 0,
|
conf->quiesce == 0,
|
||||||
|
@ -3107,7 +3135,9 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
|
||||||
last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
|
last_sector = raid_bio->bi_sector + (raid_bio->bi_size>>9);
|
||||||
|
|
||||||
for (; logical_sector < last_sector;
|
for (; logical_sector < last_sector;
|
||||||
logical_sector += STRIPE_SECTORS, scnt++) {
|
logical_sector += STRIPE_SECTORS,
|
||||||
|
sector += STRIPE_SECTORS,
|
||||||
|
scnt++) {
|
||||||
|
|
||||||
if (scnt < raid_bio->bi_hw_segments)
|
if (scnt < raid_bio->bi_hw_segments)
|
||||||
/* already done this stripe */
|
/* already done this stripe */
|
||||||
|
@ -3123,7 +3153,13 @@ static int retry_aligned_read(raid5_conf_t *conf, struct bio *raid_bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
|
set_bit(R5_ReadError, &sh->dev[dd_idx].flags);
|
||||||
add_stripe_bio(sh, raid_bio, dd_idx, 0);
|
if (!add_stripe_bio(sh, raid_bio, dd_idx, 0)) {
|
||||||
|
release_stripe(sh);
|
||||||
|
raid_bio->bi_hw_segments = scnt;
|
||||||
|
conf->retry_read_aligned = raid_bio;
|
||||||
|
return handled;
|
||||||
|
}
|
||||||
|
|
||||||
handle_stripe(sh, NULL);
|
handle_stripe(sh, NULL);
|
||||||
release_stripe(sh);
|
release_stripe(sh);
|
||||||
handled++;
|
handled++;
|
||||||
|
|
Loading…
Reference in New Issue