dm stats: support precise timestamps

Make it possible to use precise timestamps with nanosecond granularity
in dm statistics.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
This commit is contained in:
Mikulas Patocka 2015-06-09 17:21:39 -04:00 committed by Mike Snitzer
parent dd4c1b7d0c
commit c96aec344d
3 changed files with 127 additions and 43 deletions

View File

@ -13,9 +13,13 @@ the range specified.
The I/O statistics counters for each step-sized area of a region are The I/O statistics counters for each step-sized area of a region are
in the same format as /sys/block/*/stat or /proc/diskstats (see: in the same format as /sys/block/*/stat or /proc/diskstats (see:
Documentation/iostats.txt). But two extra counters (12 and 13) are Documentation/iostats.txt). But two extra counters (12 and 13) are
provided: total time spent reading and writing in milliseconds. All provided: total time spent reading and writing. All these counters may
these counters may be accessed by sending the @stats_print message to be accessed by sending the @stats_print message to the appropriate DM
the appropriate DM device via dmsetup. device via dmsetup.
The reported times are in milliseconds and the granularity depends on
the kernel ticks. When the option precise_timestamps is used, the
reported times are in nanoseconds.
Each region has a corresponding unique identifier, which we call a Each region has a corresponding unique identifier, which we call a
region_id, that is assigned when the region is created. The region_id region_id, that is assigned when the region is created. The region_id
@ -33,7 +37,9 @@ memory is used by reading
Messages Messages
======== ========
@stats_create <range> <step> [<program_id> [<aux_data>]] @stats_create <range> <step>
[<number_of_optional_arguments> <optional_arguments>...]
[<program_id> [<aux_data>]]
Create a new region and return the region_id. Create a new region and return the region_id.
@ -48,6 +54,17 @@ Messages
"/<number_of_areas>" - the range is subdivided into the specified "/<number_of_areas>" - the range is subdivided into the specified
number of areas. number of areas.
<number_of_optional_arguments>
The number of optional arguments
<optional_arguments>
The following optional arguments are supported
precise_timestamps - use precise timer with nanosecond resolution
instead of the "jiffies" variable. When this argument is
used, the resulting times are in nanoseconds instead of
milliseconds. Precise timestamps are a little bit slower
to obtain than jiffies-based timestamps.
<program_id> <program_id>
An optional parameter. A name that uniquely identifies An optional parameter. A name that uniquely identifies
the userspace owner of the range. This groups ranges together the userspace owner of the range. This groups ranges together
@ -55,6 +72,9 @@ Messages
created and ignore those created by others. created and ignore those created by others.
The kernel returns this string back in the output of The kernel returns this string back in the output of
@stats_list message, but it doesn't use it for anything else. @stats_list message, but it doesn't use it for anything else.
If we omit the number of optional arguments, program id must not
be a number, otherwise it would be interpreted as the number of
optional arguments.
<aux_data> <aux_data>
An optional parameter. A word that provides auxiliary data An optional parameter. A word that provides auxiliary data

View File

@ -33,13 +33,14 @@ struct dm_stat_percpu {
struct dm_stat_shared { struct dm_stat_shared {
atomic_t in_flight[2]; atomic_t in_flight[2];
unsigned long stamp; unsigned long long stamp;
struct dm_stat_percpu tmp; struct dm_stat_percpu tmp;
}; };
struct dm_stat { struct dm_stat {
struct list_head list_entry; struct list_head list_entry;
int id; int id;
unsigned stat_flags;
size_t n_entries; size_t n_entries;
sector_t start; sector_t start;
sector_t end; sector_t end;
@ -53,6 +54,8 @@ struct dm_stat {
struct dm_stat_shared stat_shared[0]; struct dm_stat_shared stat_shared[0];
}; };
#define STAT_PRECISE_TIMESTAMPS 1
struct dm_stats_last_position { struct dm_stats_last_position {
sector_t last_sector; sector_t last_sector;
unsigned last_rw; unsigned last_rw;
@ -224,7 +227,8 @@ void dm_stats_cleanup(struct dm_stats *stats)
} }
static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end, static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
sector_t step, const char *program_id, const char *aux_data, sector_t step, unsigned stat_flags,
const char *program_id, const char *aux_data,
void (*suspend_callback)(struct mapped_device *), void (*suspend_callback)(struct mapped_device *),
void (*resume_callback)(struct mapped_device *), void (*resume_callback)(struct mapped_device *),
struct mapped_device *md) struct mapped_device *md)
@ -265,6 +269,7 @@ static int dm_stats_create(struct dm_stats *stats, sector_t start, sector_t end,
if (!s) if (!s)
return -ENOMEM; return -ENOMEM;
s->stat_flags = stat_flags;
s->n_entries = n_entries; s->n_entries = n_entries;
s->start = start; s->start = start;
s->end = end; s->end = end;
@ -414,18 +419,24 @@ static int dm_stats_list(struct dm_stats *stats, const char *program,
return 1; return 1;
} }
static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *p) static void dm_stat_round(struct dm_stat *s, struct dm_stat_shared *shared,
struct dm_stat_percpu *p)
{ {
/* /*
* This is racy, but so is part_round_stats_single. * This is racy, but so is part_round_stats_single.
*/ */
unsigned long now = jiffies; unsigned long long now, difference;
unsigned in_flight_read; unsigned in_flight_read, in_flight_write;
unsigned in_flight_write;
unsigned long difference = now - shared->stamp;
if (likely(!(s->stat_flags & STAT_PRECISE_TIMESTAMPS)))
now = jiffies;
else
now = ktime_to_ns(ktime_get());
difference = now - shared->stamp;
if (!difference) if (!difference)
return; return;
in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]); in_flight_read = (unsigned)atomic_read(&shared->in_flight[READ]);
in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]); in_flight_write = (unsigned)atomic_read(&shared->in_flight[WRITE]);
if (in_flight_read) if (in_flight_read)
@ -440,8 +451,9 @@ static void dm_stat_round(struct dm_stat_shared *shared, struct dm_stat_percpu *
} }
static void dm_stat_for_entry(struct dm_stat *s, size_t entry, static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
unsigned long bi_rw, sector_t len, bool merged, unsigned long bi_rw, sector_t len,
bool end, unsigned long duration) struct dm_stats_aux *stats_aux, bool end,
unsigned long duration_jiffies)
{ {
unsigned long idx = bi_rw & REQ_WRITE; unsigned long idx = bi_rw & REQ_WRITE;
struct dm_stat_shared *shared = &s->stat_shared[entry]; struct dm_stat_shared *shared = &s->stat_shared[entry];
@ -471,15 +483,18 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
p = &s->stat_percpu[smp_processor_id()][entry]; p = &s->stat_percpu[smp_processor_id()][entry];
if (!end) { if (!end) {
dm_stat_round(shared, p); dm_stat_round(s, shared, p);
atomic_inc(&shared->in_flight[idx]); atomic_inc(&shared->in_flight[idx]);
} else { } else {
dm_stat_round(shared, p); dm_stat_round(s, shared, p);
atomic_dec(&shared->in_flight[idx]); atomic_dec(&shared->in_flight[idx]);
p->sectors[idx] += len; p->sectors[idx] += len;
p->ios[idx] += 1; p->ios[idx] += 1;
p->merges[idx] += merged; p->merges[idx] += stats_aux->merged;
p->ticks[idx] += duration; if (!(s->stat_flags & STAT_PRECISE_TIMESTAMPS))
p->ticks[idx] += duration_jiffies;
else
p->ticks[idx] += stats_aux->duration_ns;
} }
#if BITS_PER_LONG == 32 #if BITS_PER_LONG == 32
@ -491,7 +506,7 @@ static void dm_stat_for_entry(struct dm_stat *s, size_t entry,
static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw, static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
sector_t bi_sector, sector_t end_sector, sector_t bi_sector, sector_t end_sector,
bool end, unsigned long duration, bool end, unsigned long duration_jiffies,
struct dm_stats_aux *stats_aux) struct dm_stats_aux *stats_aux)
{ {
sector_t rel_sector, offset, todo, fragment_len; sector_t rel_sector, offset, todo, fragment_len;
@ -520,7 +535,7 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
if (fragment_len > s->step - offset) if (fragment_len > s->step - offset)
fragment_len = s->step - offset; fragment_len = s->step - offset;
dm_stat_for_entry(s, entry, bi_rw, fragment_len, dm_stat_for_entry(s, entry, bi_rw, fragment_len,
stats_aux->merged, end, duration); stats_aux, end, duration_jiffies);
todo -= fragment_len; todo -= fragment_len;
entry++; entry++;
offset = 0; offset = 0;
@ -529,11 +544,13 @@ static void __dm_stat_bio(struct dm_stat *s, unsigned long bi_rw,
void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
sector_t bi_sector, unsigned bi_sectors, bool end, sector_t bi_sector, unsigned bi_sectors, bool end,
unsigned long duration, struct dm_stats_aux *stats_aux) unsigned long duration_jiffies,
struct dm_stats_aux *stats_aux)
{ {
struct dm_stat *s; struct dm_stat *s;
sector_t end_sector; sector_t end_sector;
struct dm_stats_last_position *last; struct dm_stats_last_position *last;
bool got_precise_time;
if (unlikely(!bi_sectors)) if (unlikely(!bi_sectors))
return; return;
@ -557,8 +574,17 @@ void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
rcu_read_lock(); rcu_read_lock();
list_for_each_entry_rcu(s, &stats->list, list_entry) got_precise_time = false;
__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration, stats_aux); list_for_each_entry_rcu(s, &stats->list, list_entry) {
if (s->stat_flags & STAT_PRECISE_TIMESTAMPS && !got_precise_time) {
if (!end)
stats_aux->duration_ns = ktime_to_ns(ktime_get());
else
stats_aux->duration_ns = ktime_to_ns(ktime_get()) - stats_aux->duration_ns;
got_precise_time = true;
}
__dm_stat_bio(s, bi_rw, bi_sector, end_sector, end, duration_jiffies, stats_aux);
}
rcu_read_unlock(); rcu_read_unlock();
} }
@ -571,7 +597,7 @@ static void __dm_stat_init_temporary_percpu_totals(struct dm_stat_shared *shared
local_irq_disable(); local_irq_disable();
p = &s->stat_percpu[smp_processor_id()][x]; p = &s->stat_percpu[smp_processor_id()][x];
dm_stat_round(shared, p); dm_stat_round(s, shared, p);
local_irq_enable(); local_irq_enable();
memset(&shared->tmp, 0, sizeof(shared->tmp)); memset(&shared->tmp, 0, sizeof(shared->tmp));
@ -643,11 +669,15 @@ static int dm_stats_clear(struct dm_stats *stats, int id)
/* /*
* This is like jiffies_to_msec, but works for 64-bit values. * This is like jiffies_to_msec, but works for 64-bit values.
*/ */
static unsigned long long dm_jiffies_to_msec64(unsigned long long j) static unsigned long long dm_jiffies_to_msec64(struct dm_stat *s, unsigned long long j)
{ {
unsigned long long result = 0; unsigned long long result;
unsigned mult; unsigned mult;
if (s->stat_flags & STAT_PRECISE_TIMESTAMPS)
return j;
result = 0;
if (j) if (j)
result = jiffies_to_msecs(j & 0x3fffff); result = jiffies_to_msecs(j & 0x3fffff);
if (j >= 1 << 22) { if (j >= 1 << 22) {
@ -709,16 +739,16 @@ static int dm_stats_print(struct dm_stats *stats, int id,
shared->tmp.ios[READ], shared->tmp.ios[READ],
shared->tmp.merges[READ], shared->tmp.merges[READ],
shared->tmp.sectors[READ], shared->tmp.sectors[READ],
dm_jiffies_to_msec64(shared->tmp.ticks[READ]), dm_jiffies_to_msec64(s, shared->tmp.ticks[READ]),
shared->tmp.ios[WRITE], shared->tmp.ios[WRITE],
shared->tmp.merges[WRITE], shared->tmp.merges[WRITE],
shared->tmp.sectors[WRITE], shared->tmp.sectors[WRITE],
dm_jiffies_to_msec64(shared->tmp.ticks[WRITE]), dm_jiffies_to_msec64(s, shared->tmp.ticks[WRITE]),
dm_stat_in_flight(shared), dm_stat_in_flight(shared),
dm_jiffies_to_msec64(shared->tmp.io_ticks_total), dm_jiffies_to_msec64(s, shared->tmp.io_ticks_total),
dm_jiffies_to_msec64(shared->tmp.time_in_queue), dm_jiffies_to_msec64(s, shared->tmp.time_in_queue),
dm_jiffies_to_msec64(shared->tmp.io_ticks[READ]), dm_jiffies_to_msec64(s, shared->tmp.io_ticks[READ]),
dm_jiffies_to_msec64(shared->tmp.io_ticks[WRITE])); dm_jiffies_to_msec64(s, shared->tmp.io_ticks[WRITE]));
if (unlikely(sz + 1 >= maxlen)) if (unlikely(sz + 1 >= maxlen))
goto buffer_overflow; goto buffer_overflow;
@ -769,21 +799,31 @@ static int message_stats_create(struct mapped_device *md,
unsigned long long start, end, len, step; unsigned long long start, end, len, step;
unsigned divisor; unsigned divisor;
const char *program_id, *aux_data; const char *program_id, *aux_data;
unsigned stat_flags = 0;
struct dm_arg_set as, as_backup;
const char *a;
unsigned feature_args;
/* /*
* Input format: * Input format:
* <range> <step> [<program_id> [<aux_data>]] * <range> <step> [<extra_parameters> <parameters>] [<program_id> [<aux_data>]]
*/ */
if (argc < 3 || argc > 5) if (argc < 3)
return -EINVAL; return -EINVAL;
if (!strcmp(argv[1], "-")) { as.argc = argc;
as.argv = argv;
dm_consume_args(&as, 1);
a = dm_shift_arg(&as);
if (!strcmp(a, "-")) {
start = 0; start = 0;
len = dm_get_size(md); len = dm_get_size(md);
if (!len) if (!len)
len = 1; len = 1;
} else if (sscanf(argv[1], "%llu+%llu%c", &start, &len, &dummy) != 2 || } else if (sscanf(a, "%llu+%llu%c", &start, &len, &dummy) != 2 ||
start != (sector_t)start || len != (sector_t)len) start != (sector_t)start || len != (sector_t)len)
return -EINVAL; return -EINVAL;
@ -791,7 +831,8 @@ static int message_stats_create(struct mapped_device *md,
if (start >= end) if (start >= end)
return -EINVAL; return -EINVAL;
if (sscanf(argv[2], "/%u%c", &divisor, &dummy) == 1) { a = dm_shift_arg(&as);
if (sscanf(a, "/%u%c", &divisor, &dummy) == 1) {
if (!divisor) if (!divisor)
return -EINVAL; return -EINVAL;
step = end - start; step = end - start;
@ -799,18 +840,39 @@ static int message_stats_create(struct mapped_device *md,
step++; step++;
if (!step) if (!step)
step = 1; step = 1;
} else if (sscanf(argv[2], "%llu%c", &step, &dummy) != 1 || } else if (sscanf(a, "%llu%c", &step, &dummy) != 1 ||
step != (sector_t)step || !step) step != (sector_t)step || !step)
return -EINVAL; return -EINVAL;
as_backup = as;
a = dm_shift_arg(&as);
if (a && sscanf(a, "%u%c", &feature_args, &dummy) == 1) {
while (feature_args--) {
a = dm_shift_arg(&as);
if (!a)
return -EINVAL;
if (!strcasecmp(a, "precise_timestamps"))
stat_flags |= STAT_PRECISE_TIMESTAMPS;
else
return -EINVAL;
}
} else {
as = as_backup;
}
program_id = "-"; program_id = "-";
aux_data = "-"; aux_data = "-";
if (argc > 3) a = dm_shift_arg(&as);
program_id = argv[3]; if (a)
program_id = a;
if (argc > 4) a = dm_shift_arg(&as);
aux_data = argv[4]; if (a)
aux_data = a;
if (as.argc)
return -EINVAL;
/* /*
* If a buffer overflow happens after we created the region, * If a buffer overflow happens after we created the region,
@ -822,7 +884,7 @@ static int message_stats_create(struct mapped_device *md,
if (dm_message_test_buffer_overflow(result, maxlen)) if (dm_message_test_buffer_overflow(result, maxlen))
return 1; return 1;
id = dm_stats_create(dm_get_stats(md), start, end, step, program_id, aux_data, id = dm_stats_create(dm_get_stats(md), start, end, step, stat_flags, program_id, aux_data,
dm_internal_suspend_fast, dm_internal_resume_fast, md); dm_internal_suspend_fast, dm_internal_resume_fast, md);
if (id < 0) if (id < 0)
return id; return id;

View File

@ -18,6 +18,7 @@ struct dm_stats {
struct dm_stats_aux { struct dm_stats_aux {
bool merged; bool merged;
unsigned long long duration_ns;
}; };
void dm_stats_init(struct dm_stats *st); void dm_stats_init(struct dm_stats *st);
@ -30,7 +31,8 @@ int dm_stats_message(struct mapped_device *md, unsigned argc, char **argv,
void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw, void dm_stats_account_io(struct dm_stats *stats, unsigned long bi_rw,
sector_t bi_sector, unsigned bi_sectors, bool end, sector_t bi_sector, unsigned bi_sectors, bool end,
unsigned long duration, struct dm_stats_aux *aux); unsigned long duration_jiffies,
struct dm_stats_aux *aux);
static inline bool dm_stats_used(struct dm_stats *st) static inline bool dm_stats_used(struct dm_stats *st)
{ {