block/diskstats: more accurate approximation of io_ticks for slow disks

Currently io_ticks is approximated by adding one at each start and end of
requests if jiffies counter has changed. This works perfectly for requests
shorter than a jiffy or if one of requests starts/ends at each jiffy.

If disk executes just one request at a time and they are longer than two
jiffies then only first and last jiffies will be accounted.

Fix is simple: at the end of request add up into io_ticks jiffies passed
since last update rather than just one jiffy.

Example: common HDD executes random read 4k requests around 12ms.

fio --name=test --filename=/dev/sdb --rw=randread --direct=1 --runtime=30 &
iostat -x 10 sdb

Note changes of iostat's "%util" 8,43% -> 99,99% before/after patch:

Before:

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
sdb               0,00     0,00   82,60    0,00   330,40     0,00     8,00     0,96   12,09   12,09    0,00   1,02   8,43

After:

Device:         rrqm/s   wrqm/s     r/s     w/s    rkB/s    wkB/s avgrq-sz avgqu-sz   await r_await w_await  svctm  %util
sdb               0,00     0,00   82,50    0,00   330,00     0,00     8,00     1,00   12,10   12,10    0,00  12,12  99,99

Now io_ticks does not loose time between start and end of requests, but
for queue-depth > 1 some I/O time between adjacent starts might be lost.

For load estimation "%util" is not as useful as average queue length,
but it clearly shows how often disk queue is completely empty.

Fixes: 5b18b5a737 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
Konstantin Khlebnikov 2020-03-25 16:07:04 +03:00 committed by Jens Axboe
parent 387048bf67
commit 2b8bd42361
4 changed files with 11 additions and 8 deletions

View File

@ -100,7 +100,7 @@ Field 10 -- # of milliseconds spent doing I/Os (unsigned int)
Since 5.0 this field counts jiffies when at least one request was Since 5.0 this field counts jiffies when at least one request was
started or completed. If request runs more than 2 jiffies then some started or completed. If request runs more than 2 jiffies then some
I/O time will not be accounted unless there are other requests. I/O time might be not accounted in case of concurrent requests.
Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int) Field 11 -- weighted # of milliseconds spent doing I/Os (unsigned int)
This field is incremented at each I/O start, I/O completion, I/O This field is incremented at each I/O start, I/O completion, I/O
@ -143,6 +143,9 @@ are summed (possibly overflowing the unsigned long variable they are
summed to) and the result given to the user. There is no convenient summed to) and the result given to the user. There is no convenient
user interface for accessing the per-CPU counters themselves. user interface for accessing the per-CPU counters themselves.
Since 4.19 request times are measured with nanoseconds precision and
truncated to milliseconds before showing in this interface.
Disks vs Partitions Disks vs Partitions
------------------- -------------------

View File

@ -1768,14 +1768,14 @@ defer:
schedule_work(&bio_dirty_work); schedule_work(&bio_dirty_work);
} }
void update_io_ticks(struct hd_struct *part, unsigned long now) void update_io_ticks(struct hd_struct *part, unsigned long now, bool end)
{ {
unsigned long stamp; unsigned long stamp;
again: again:
stamp = READ_ONCE(part->stamp); stamp = READ_ONCE(part->stamp);
if (unlikely(stamp != now)) { if (unlikely(stamp != now)) {
if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) { if (likely(cmpxchg(&part->stamp, stamp, now) == stamp)) {
__part_stat_add(part, io_ticks, 1); __part_stat_add(part, io_ticks, end ? now - stamp : 1);
} }
} }
if (part->partno) { if (part->partno) {
@ -1791,7 +1791,7 @@ void generic_start_io_acct(struct request_queue *q, int op,
part_stat_lock(); part_stat_lock();
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, false);
part_stat_inc(part, ios[sgrp]); part_stat_inc(part, ios[sgrp]);
part_stat_add(part, sectors[sgrp], sectors); part_stat_add(part, sectors[sgrp], sectors);
part_inc_in_flight(q, part, op_is_write(op)); part_inc_in_flight(q, part, op_is_write(op));
@ -1809,7 +1809,7 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
part_stat_lock(); part_stat_lock();
update_io_ticks(part, now); update_io_ticks(part, now, true);
part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration)); part_stat_add(part, nsecs[sgrp], jiffies_to_nsecs(duration));
part_stat_add(part, time_in_queue, duration); part_stat_add(part, time_in_queue, duration);
part_dec_in_flight(q, part, op_is_write(req_op)); part_dec_in_flight(q, part, op_is_write(req_op));

View File

@ -1337,7 +1337,7 @@ void blk_account_io_done(struct request *req, u64 now)
part_stat_lock(); part_stat_lock();
part = req->part; part = req->part;
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, true);
part_stat_inc(part, ios[sgrp]); part_stat_inc(part, ios[sgrp]);
part_stat_add(part, nsecs[sgrp], now - req->start_time_ns); part_stat_add(part, nsecs[sgrp], now - req->start_time_ns);
part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns)); part_stat_add(part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
@ -1379,7 +1379,7 @@ void blk_account_io_start(struct request *rq, bool new_io)
rq->part = part; rq->part = part;
} }
update_io_ticks(part, jiffies); update_io_ticks(part, jiffies, false);
part_stat_unlock(); part_stat_unlock();
} }

View File

@ -422,7 +422,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part,
void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, void part_inc_in_flight(struct request_queue *q, struct hd_struct *part,
int rw); int rw);
void update_io_ticks(struct hd_struct *part, unsigned long now); void update_io_ticks(struct hd_struct *part, unsigned long now, bool end);
/* block/genhd.c */ /* block/genhd.c */
extern void device_add_disk(struct device *parent, struct gendisk *disk, extern void device_add_disk(struct device *parent, struct gendisk *disk,