block: fix rq-qos breakage from skipping rq_qos_done_bio()
a647a524a4
("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") made bio_endio() skip rq_qos_done_bio() if BIO_TRACKED is not set. While this fixed a potential oops, it also broke blk-iocost by skipping the done_bio callback for merged bios. Before, whether a bio goes through rq_qos_throttle() or rq_qos_merge(), rq_qos_done_bio() would be called on the bio on completion with BIO_TRACKED distinguishing the former from the latter. rq_qos_done_bio() is not called for bios which wenth through rq_qos_merge(). This royally confuses blk-iocost as the merged bios never finish and are considered perpetually in-flight. One reliably reproducible failure mode is an intermediate cgroup geting stuck active preventing its children from being activated due to the leaf-only rule, leading to loss of control. The following is from resctl-bench protection scenario which emulates isolating a web server like workload from a memory bomb run on an iocost configuration which should yield a reasonable level of protection. # cat /sys/block/nvme2n1/device/model Samsung SSD 970 PRO 512GB # cat /sys/fs/cgroup/io.cost.model 259:0 ctrl=user model=linear rbps=834913556 rseqiops=93622 rrandiops=102913 wbps=618985353 wseqiops=72325 wrandiops=71025 # cat /sys/fs/cgroup/io.cost.qos 259:0 enable=1 ctrl=user rpct=95.00 rlat=18776 wpct=95.00 wlat=8897 min=60.00 max=100.00 # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=242u:336u/2.5m p90=794u:1.4m/7.5m p99=2.7m:8.0m/62.5m max=8.0m:36.4m/350m W p50=221u:323u/1.5m p90=709u:1.2m/5.5m p99=1.5m:2.5m/9.5m max=6.9m:35.9m/350m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 15.90 15.90 15.90 40.05 57.24 59.07 60.01 74.63 74.63 90.35 90.35 58.12 15.82 lat-imp% 0 0 0 0 0 4.55 14.68 15.54 233.5 548.1 548.1 53.88 143.6 Result: isol=58.12:15.82% lat_imp=53.88%:143.6 work_csv=100.0% missing=3.96% The isolation result of 58.12% is close to what this device would show without any IO control. Fix it by introducing a new flag BIO_QOS_MERGED to mark merged bios and calling rq_qos_done_bio() on them too. For consistency and clarity, rename BIO_TRACKED to BIO_QOS_THROTTLED. The flag checks are moved into rq_qos_done_bio() so that it's next to the code paths that set the flags. With the patch applied, the above same benchmark shows: # resctl-bench -m 29.6G -r out.json run protection::scenario=mem-hog,loops=1 ... Memory Hog Summary ================== IO Latency: R p50=123u:84.4u/985u p90=322u:256u/2.5m p99=1.6m:1.4m/9.5m max=11.1m:36.0m/350m W p50=429u:274u/995u p90=1.7m:1.3m/4.5m p99=3.4m:2.7m/11.5m max=7.9m:5.9m/26.5m Isolation and Request Latency Impact Distributions: min p01 p05 p10 p25 p50 p75 p90 p95 p99 max mean stdev isol% 84.91 84.91 89.51 90.73 92.31 94.49 96.36 98.04 98.71 100.0 100.0 94.42 2.81 lat-imp% 0 0 0 0 0 2.81 5.73 11.11 13.92 17.53 22.61 4.10 4.68 Result: isol=94.42:2.81% lat_imp=4.10%:4.68 work_csv=58.34% missing=0% Signed-off-by: Tejun Heo <tj@kernel.org> Fixes:a647a524a4
("block: don't call rq_qos_ops->done_bio if the bio isn't tracked") Cc: stable@vger.kernel.org # v5.15+ Cc: Ming Lei <ming.lei@redhat.com> Cc: Yu Kuai <yukuai3@huawei.com> Reviewed-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/Yi7rdrzQEHjJLGKB@slm.duckdns.org Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
26fed4ac4e
commit
aa1b46dcdc
|
@ -1516,8 +1516,7 @@ again:
|
|||
if (!bio_integrity_endio(bio))
|
||||
return;
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACKED))
|
||||
rq_qos_done_bio(bdev_get_queue(bio->bi_bdev), bio);
|
||||
rq_qos_done_bio(bio);
|
||||
|
||||
if (bio->bi_bdev && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
|
||||
trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio);
|
||||
|
|
|
@ -598,7 +598,7 @@ static void blkcg_iolatency_done_bio(struct rq_qos *rqos, struct bio *bio)
|
|||
int inflight = 0;
|
||||
|
||||
blkg = bio->bi_blkg;
|
||||
if (!blkg || !bio_flagged(bio, BIO_TRACKED))
|
||||
if (!blkg || !bio_flagged(bio, BIO_QOS_THROTTLED))
|
||||
return;
|
||||
|
||||
iolat = blkg_to_lat(bio->bi_blkg);
|
||||
|
|
|
@ -177,20 +177,20 @@ static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
|
|||
__rq_qos_requeue(q->rq_qos, rq);
|
||||
}
|
||||
|
||||
static inline void rq_qos_done_bio(struct request_queue *q, struct bio *bio)
|
||||
static inline void rq_qos_done_bio(struct bio *bio)
|
||||
{
|
||||
if (q->rq_qos)
|
||||
__rq_qos_done_bio(q->rq_qos, bio);
|
||||
if (bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
|
||||
bio_flagged(bio, BIO_QOS_MERGED))) {
|
||||
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
|
||||
if (q->rq_qos)
|
||||
__rq_qos_done_bio(q->rq_qos, bio);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
|
||||
{
|
||||
/*
|
||||
* BIO_TRACKED lets controllers know that a bio went through the
|
||||
* normal rq_qos path.
|
||||
*/
|
||||
if (q->rq_qos) {
|
||||
bio_set_flag(bio, BIO_TRACKED);
|
||||
bio_set_flag(bio, BIO_QOS_THROTTLED);
|
||||
__rq_qos_throttle(q->rq_qos, bio);
|
||||
}
|
||||
}
|
||||
|
@ -205,8 +205,10 @@ static inline void rq_qos_track(struct request_queue *q, struct request *rq,
|
|||
static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
|
||||
struct bio *bio)
|
||||
{
|
||||
if (q->rq_qos)
|
||||
if (q->rq_qos) {
|
||||
bio_set_flag(bio, BIO_QOS_MERGED);
|
||||
__rq_qos_merge(q->rq_qos, rq, bio);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void rq_qos_queue_depth_changed(struct request_queue *q)
|
||||
|
|
|
@ -324,7 +324,8 @@ enum {
|
|||
BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion
|
||||
* of this bio. */
|
||||
BIO_CGROUP_ACCT, /* has been accounted to a cgroup */
|
||||
BIO_TRACKED, /* set if bio goes through the rq_qos path */
|
||||
BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */
|
||||
BIO_QOS_MERGED, /* but went through rq_qos merge path */
|
||||
BIO_REMAPPED,
|
||||
BIO_ZONE_WRITE_LOCKED, /* Owns a zoned device zone write lock */
|
||||
BIO_PERCPU_CACHE, /* can participate in per-cpu alloc cache */
|
||||
|
|
Loading…
Reference in New Issue