block-throttle: avoid double charge
If a bio is throttled and split after throttling, the bio could be resubmited and enters the throttling again. This will cause part of the bio to be charged multiple times. If the cgroup has an IO limit, the double charge will significantly harm the performance. The bio split becomes quite common after arbitrary bio size change. To fix this, we always set the BIO_THROTTLED flag if a bio is throttled. If the bio is cloned/split, we copy the flag to new bio too to avoid a double charge. However, cloned bio could be directed to a new disk, keeping the flag be a problem. The observation is we always set new disk for the bio in this case, so we can clear the flag in bio_set_dev(). This issue exists for a long time, arbitrary bio size change just makes it worse, so this should go into stable at least since v4.2. V1-> V2: Not add extra field in bio based on discussion with Tejun Cc: Vivek Goyal <vgoyal@redhat.com> Cc: stable@vger.kernel.org Acked-by: Tejun Heo <tj@kernel.org> Signed-off-by: Shaohua Li <shli@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
0abc2a1038
commit
111be88398
|
@ -599,6 +599,8 @@ void __bio_clone_fast(struct bio *bio, struct bio *bio_src)
|
|||
bio->bi_disk = bio_src->bi_disk;
|
||||
bio->bi_partno = bio_src->bi_partno;
|
||||
bio_set_flag(bio, BIO_CLONED);
|
||||
if (bio_flagged(bio_src, BIO_THROTTLED))
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
bio->bi_opf = bio_src->bi_opf;
|
||||
bio->bi_write_hint = bio_src->bi_write_hint;
|
||||
bio->bi_iter = bio_src->bi_iter;
|
||||
|
|
|
@ -2226,13 +2226,7 @@ again:
|
|||
out_unlock:
|
||||
spin_unlock_irq(q->queue_lock);
|
||||
out:
|
||||
/*
|
||||
* As multiple blk-throtls may stack in the same issue path, we
|
||||
* don't want bios to leave with the flag set. Clear the flag if
|
||||
* being issued.
|
||||
*/
|
||||
if (!throttled)
|
||||
bio_clear_flag(bio, BIO_THROTTLED);
|
||||
bio_set_flag(bio, BIO_THROTTLED);
|
||||
|
||||
#ifdef CONFIG_BLK_DEV_THROTTLING_LOW
|
||||
if (throttled || !td->track_bio_latency)
|
||||
|
|
|
@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx);
|
|||
|
||||
#define bio_set_dev(bio, bdev) \
|
||||
do { \
|
||||
if ((bio)->bi_disk != (bdev)->bd_disk) \
|
||||
bio_clear_flag(bio, BIO_THROTTLED);\
|
||||
(bio)->bi_disk = (bdev)->bd_disk; \
|
||||
(bio)->bi_partno = (bdev)->bd_partno; \
|
||||
} while (0)
|
||||
|
|
|
@ -50,8 +50,6 @@ struct blk_issue_stat {
|
|||
struct bio {
|
||||
struct bio *bi_next; /* request queue link */
|
||||
struct gendisk *bi_disk;
|
||||
u8 bi_partno;
|
||||
blk_status_t bi_status;
|
||||
unsigned int bi_opf; /* bottom bits req flags,
|
||||
* top bits REQ_OP. Use
|
||||
* accessors.
|
||||
|
@ -59,8 +57,8 @@ struct bio {
|
|||
unsigned short bi_flags; /* status, etc and bvec pool number */
|
||||
unsigned short bi_ioprio;
|
||||
unsigned short bi_write_hint;
|
||||
|
||||
struct bvec_iter bi_iter;
|
||||
blk_status_t bi_status;
|
||||
u8 bi_partno;
|
||||
|
||||
/* Number of segments in this BIO after
|
||||
* physical address coalescing is performed.
|
||||
|
@ -74,8 +72,9 @@ struct bio {
|
|||
unsigned int bi_seg_front_size;
|
||||
unsigned int bi_seg_back_size;
|
||||
|
||||
atomic_t __bi_remaining;
|
||||
struct bvec_iter bi_iter;
|
||||
|
||||
atomic_t __bi_remaining;
|
||||
bio_end_io_t *bi_end_io;
|
||||
|
||||
void *bi_private;
|
||||
|
|
Loading…
Reference in New Issue