block: Track DISCARD statistics and output them in stat and diskstat
Add tracking of REQ_OP_DISCARD ios to the partition statistics and append them to the various stat files in /sys as well as /proc/diskstats. These are tracked with the same four stats as reads and writes: Number of discard ios completed. Number of discard ios merged Number of discard sectors completed Milliseconds spent on discard requests This is done via adding a new STAT_DISCARD define to genhd.h and then using it to index that stat field for discard requests. tj: Refreshed on top of v4.17 and other previous updates. Signed-off-by: Michael Callahan <michaelcallahan@fb.com> Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Andy Newell <newella@fb.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
This commit is contained in:
parent
ddcf35d397
commit
bdca3c87fb
|
@ -5,6 +5,7 @@ Description:
|
|||
The /proc/diskstats file displays the I/O statistics
|
||||
of block devices. Each line contains the following 14
|
||||
fields:
|
||||
|
||||
1 - major number
|
||||
2 - minor mumber
|
||||
3 - device name
|
||||
|
@ -19,4 +20,13 @@ Description:
|
|||
12 - I/Os currently in progress
|
||||
13 - time spent doing I/Os (ms)
|
||||
14 - weighted time spent doing I/Os (ms)
|
||||
|
||||
Kernel 4.18+ appends four more fields for discard
|
||||
tracking putting the total at 18:
|
||||
|
||||
15 - discards completed successfully
|
||||
16 - discards merged
|
||||
17 - sectors discarded
|
||||
18 - time spent discarding
|
||||
|
||||
For more details refer to Documentation/iostats.txt
|
||||
|
|
|
@ -31,28 +31,32 @@ write ticks milliseconds total wait time for write requests
|
|||
in_flight requests number of I/Os currently in flight
|
||||
io_ticks milliseconds total time this block device has been active
|
||||
time_in_queue milliseconds total wait time for all requests
|
||||
discard I/Os requests number of discard I/Os processed
|
||||
discard merges requests number of discard I/Os merged with in-queue I/O
|
||||
discard sectors sectors number of sectors discarded
|
||||
discard ticks milliseconds total wait time for discard requests
|
||||
|
||||
read I/Os, write I/Os
|
||||
=====================
|
||||
read I/Os, write I/Os, discard I/0s
|
||||
===================================
|
||||
|
||||
These values increment when an I/O request completes.
|
||||
|
||||
read merges, write merges
|
||||
=========================
|
||||
read merges, write merges, discard merges
|
||||
=========================================
|
||||
|
||||
These values increment when an I/O request is merged with an
|
||||
already-queued I/O request.
|
||||
|
||||
read sectors, write sectors
|
||||
===========================
|
||||
read sectors, write sectors, discard_sectors
|
||||
============================================
|
||||
|
||||
These values count the number of sectors read from or written to this
|
||||
block device. The "sectors" in question are the standard UNIX 512-byte
|
||||
sectors, not any device- or filesystem-specific block size. The
|
||||
counters are incremented when the I/O completes.
|
||||
These values count the number of sectors read from, written to, or
|
||||
discarded from this block device. The "sectors" in question are the
|
||||
standard UNIX 512-byte sectors, not any device- or filesystem-specific
|
||||
block size. The counters are incremented when the I/O completes.
|
||||
|
||||
read ticks, write ticks
|
||||
=======================
|
||||
read ticks, write ticks, discard ticks
|
||||
======================================
|
||||
|
||||
These values count the number of milliseconds that I/O requests have
|
||||
waited on this block device. If there are multiple I/O requests waiting,
|
||||
|
|
|
@ -31,6 +31,9 @@ Here are examples of these different formats::
|
|||
3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160
|
||||
3 1 hda1 35486 38030 38030 38030
|
||||
|
||||
4.18+ diskstats:
|
||||
3 0 hda 446216 784926 9550688 4382310 424847 312726 5922052 19310380 0 3376340 23705160 0 0 0 0
|
||||
|
||||
On 2.4 you might execute ``grep 'hda ' /proc/partitions``. On 2.6+, you have
|
||||
a choice of ``cat /sys/block/hda/stat`` or ``grep 'hda ' /proc/diskstats``.
|
||||
|
||||
|
@ -101,6 +104,18 @@ Field 11 -- weighted # of milliseconds spent doing I/Os
|
|||
last update of this field. This can provide an easy measure of both
|
||||
I/O completion time and the backlog that may be accumulating.
|
||||
|
||||
Field 12 -- # of discards completed
|
||||
This is the total number of discards completed successfully.
|
||||
|
||||
Field 13 -- # of discards merged
|
||||
See the description of field 2
|
||||
|
||||
Field 14 -- # of sectors discarded
|
||||
This is the total number of sectors discarded successfully.
|
||||
|
||||
Field 15 -- # of milliseconds spent discarding
|
||||
This is the total number of milliseconds spent by all discards (as
|
||||
measured from __make_request() to end_that_request_last()).
|
||||
|
||||
To avoid introducing performance bottlenecks, no locks are held while
|
||||
modifying these counters. This implies that minor inaccuracies may be
|
||||
|
|
|
@ -1333,8 +1333,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
|||
part_round_stats(gp->queue, cpu, hd);
|
||||
part_stat_unlock();
|
||||
part_in_flight(gp->queue, hd, inflight);
|
||||
seq_printf(seqf, "%4d %7d %s %lu %lu %lu "
|
||||
"%u %lu %lu %lu %u %u %u %u\n",
|
||||
seq_printf(seqf, "%4d %7d %s "
|
||||
"%lu %lu %lu %u "
|
||||
"%lu %lu %lu %u "
|
||||
"%u %u %u "
|
||||
"%lu %lu %lu %u\n",
|
||||
MAJOR(part_devt(hd)), MINOR(part_devt(hd)),
|
||||
disk_name(gp, hd->partno, buf),
|
||||
part_stat_read(hd, ios[STAT_READ]),
|
||||
|
@ -1347,7 +1350,11 @@ static int diskstats_show(struct seq_file *seqf, void *v)
|
|||
jiffies_to_msecs(part_stat_read(hd, ticks[STAT_WRITE])),
|
||||
inflight[0],
|
||||
jiffies_to_msecs(part_stat_read(hd, io_ticks)),
|
||||
jiffies_to_msecs(part_stat_read(hd, time_in_queue))
|
||||
jiffies_to_msecs(part_stat_read(hd, time_in_queue)),
|
||||
part_stat_read(hd, ios[STAT_DISCARD]),
|
||||
part_stat_read(hd, merges[STAT_DISCARD]),
|
||||
part_stat_read(hd, sectors[STAT_DISCARD]),
|
||||
jiffies_to_msecs(part_stat_read(hd, ticks[STAT_DISCARD]))
|
||||
);
|
||||
}
|
||||
disk_part_iter_exit(&piter);
|
||||
|
|
|
@ -130,7 +130,8 @@ ssize_t part_stat_show(struct device *dev,
|
|||
return sprintf(buf,
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8lu %8lu %8llu %8u "
|
||||
"%8u %8u %8u"
|
||||
"%8u %8u %8u "
|
||||
"%8lu %8lu %8llu %8u"
|
||||
"\n",
|
||||
part_stat_read(p, ios[STAT_READ]),
|
||||
part_stat_read(p, merges[STAT_READ]),
|
||||
|
@ -142,7 +143,11 @@ ssize_t part_stat_show(struct device *dev,
|
|||
jiffies_to_msecs(part_stat_read(p, ticks[STAT_WRITE])),
|
||||
inflight[0],
|
||||
jiffies_to_msecs(part_stat_read(p, io_ticks)),
|
||||
jiffies_to_msecs(part_stat_read(p, time_in_queue)));
|
||||
jiffies_to_msecs(part_stat_read(p, time_in_queue)),
|
||||
part_stat_read(p, ios[STAT_DISCARD]),
|
||||
part_stat_read(p, merges[STAT_DISCARD]),
|
||||
(unsigned long long)part_stat_read(p, sectors[STAT_DISCARD]),
|
||||
jiffies_to_msecs(part_stat_read(p, ticks[STAT_DISCARD])));
|
||||
}
|
||||
|
||||
ssize_t part_inflight_show(struct device *dev, struct device_attribute *attr,
|
||||
|
|
|
@ -360,6 +360,7 @@ enum req_flag_bits {
|
|||
enum stat_group {
|
||||
STAT_READ,
|
||||
STAT_WRITE,
|
||||
STAT_DISCARD,
|
||||
|
||||
NR_STAT_GROUPS
|
||||
};
|
||||
|
@ -401,8 +402,15 @@ static inline bool op_is_sync(unsigned int op)
|
|||
(op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
|
||||
}
|
||||
|
||||
static inline bool op_is_discard(unsigned int op)
|
||||
{
|
||||
return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
|
||||
}
|
||||
|
||||
static inline int op_stat_group(unsigned int op)
|
||||
{
|
||||
if (op_is_discard(op))
|
||||
return STAT_DISCARD;
|
||||
return op_is_write(op);
|
||||
}
|
||||
|
||||
|
|
|
@ -356,7 +356,8 @@ static inline void free_part_stats(struct hd_struct *part)
|
|||
|
||||
#define part_stat_read_accum(part, field) \
|
||||
(part_stat_read(part, field[STAT_READ]) + \
|
||||
part_stat_read(part, field[STAT_WRITE]))
|
||||
part_stat_read(part, field[STAT_WRITE]) + \
|
||||
part_stat_read(part, field[STAT_DISCARD]))
|
||||
|
||||
#define part_stat_add(cpu, part, field, addnd) do { \
|
||||
__part_stat_add((cpu), (part), field, addnd); \
|
||||
|
|
Loading…
Reference in New Issue