perf stat: Add support to measure SMI cost
Implementing a new --smi-cost mode in perf stat to measure SMI cost. During the measurement, the /sys/device/cpu/freeze_on_smi will be set. The measurement can be done with one counter (unhalted core cycles), and two free running MSR counters (IA32_APERF and SMI_COUNT). In practice, the percentages of SMI core cycles should be more useful than absolute value. So the output will be the percentage of SMI core cycles and SMI#. metric_only will be set by default. SMI cycles% = (aperf - unhalted core cycles) / aperf Here is an example output. Performance counter stats for 'sudo echo ': SMI cycles% SMI# 0.1% 1 0.010858678 seconds time elapsed Users who wants to get the actual value can apply additional --no-metric-only. Signed-off-by: Kan Liang <Kan.liang@intel.com> Acked-by: Jiri Olsa <jolsa@kernel.org> Cc: Andi Kleen <ak@linux.intel.com> Cc: Kan Liang <kan.liang@intel.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Robert Elliott <elliott@hpe.com> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Link: http://lkml.kernel.org/r/1495825538-5230-3-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
parent
3b00ea9386
commit
daefd0bc0b
|
@ -239,6 +239,20 @@ taskset.
|
||||||
--no-merge::
|
--no-merge::
|
||||||
Do not merge results from same PMUs.
|
Do not merge results from same PMUs.
|
||||||
|
|
||||||
|
--smi-cost::
|
||||||
|
Measure SMI cost if msr/aperf/ and msr/smi/ events are supported.
|
||||||
|
|
||||||
|
During the measurement, the /sys/device/cpu/freeze_on_smi will be set to
|
||||||
|
freeze core counters on SMI.
|
||||||
|
The aperf counter will not be effected by the setting.
|
||||||
|
The cost of SMI can be measured by (aperf - unhalted core cycles).
|
||||||
|
|
||||||
|
In practice, the percentages of SMI cycles is very useful for performance
|
||||||
|
oriented analysis. --metric_only will be applied by default.
|
||||||
|
The output is SMI cycles%, equals to (aperf - unhalted core cycles) / aperf
|
||||||
|
|
||||||
|
Users who wants to get the actual value can apply --no-metric-only.
|
||||||
|
|
||||||
EXAMPLES
|
EXAMPLES
|
||||||
--------
|
--------
|
||||||
|
|
||||||
|
|
|
@ -86,6 +86,7 @@
|
||||||
#define DEFAULT_SEPARATOR " "
|
#define DEFAULT_SEPARATOR " "
|
||||||
#define CNTR_NOT_SUPPORTED "<not supported>"
|
#define CNTR_NOT_SUPPORTED "<not supported>"
|
||||||
#define CNTR_NOT_COUNTED "<not counted>"
|
#define CNTR_NOT_COUNTED "<not counted>"
|
||||||
|
#define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi"
|
||||||
|
|
||||||
static void print_counters(struct timespec *ts, int argc, const char **argv);
|
static void print_counters(struct timespec *ts, int argc, const char **argv);
|
||||||
|
|
||||||
|
@ -122,6 +123,14 @@ static const char * topdown_attrs[] = {
|
||||||
NULL,
|
NULL,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const char *smi_cost_attrs = {
|
||||||
|
"{"
|
||||||
|
"msr/aperf/,"
|
||||||
|
"msr/smi/,"
|
||||||
|
"cycles"
|
||||||
|
"}"
|
||||||
|
};
|
||||||
|
|
||||||
static struct perf_evlist *evsel_list;
|
static struct perf_evlist *evsel_list;
|
||||||
|
|
||||||
static struct target target = {
|
static struct target target = {
|
||||||
|
@ -137,6 +146,8 @@ static bool null_run = false;
|
||||||
static int detailed_run = 0;
|
static int detailed_run = 0;
|
||||||
static bool transaction_run;
|
static bool transaction_run;
|
||||||
static bool topdown_run = false;
|
static bool topdown_run = false;
|
||||||
|
static bool smi_cost = false;
|
||||||
|
static bool smi_reset = false;
|
||||||
static bool big_num = true;
|
static bool big_num = true;
|
||||||
static int big_num_opt = -1;
|
static int big_num_opt = -1;
|
||||||
static const char *csv_sep = NULL;
|
static const char *csv_sep = NULL;
|
||||||
|
@ -1782,6 +1793,8 @@ static const struct option stat_options[] = {
|
||||||
"Only print computed metrics. No raw values", enable_metric_only),
|
"Only print computed metrics. No raw values", enable_metric_only),
|
||||||
OPT_BOOLEAN(0, "topdown", &topdown_run,
|
OPT_BOOLEAN(0, "topdown", &topdown_run,
|
||||||
"measure topdown level 1 statistics"),
|
"measure topdown level 1 statistics"),
|
||||||
|
OPT_BOOLEAN(0, "smi-cost", &smi_cost,
|
||||||
|
"measure SMI cost"),
|
||||||
OPT_END()
|
OPT_END()
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -2160,6 +2173,39 @@ static int add_default_attributes(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (smi_cost) {
|
||||||
|
int smi;
|
||||||
|
|
||||||
|
if (sysfs__read_int(FREEZE_ON_SMI_PATH, &smi) < 0) {
|
||||||
|
fprintf(stderr, "freeze_on_smi is not supported.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!smi) {
|
||||||
|
if (sysfs__write_int(FREEZE_ON_SMI_PATH, 1) < 0) {
|
||||||
|
fprintf(stderr, "Failed to set freeze_on_smi.\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
smi_reset = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pmu_have_event("msr", "aperf") &&
|
||||||
|
pmu_have_event("msr", "smi")) {
|
||||||
|
if (!force_metric_only)
|
||||||
|
metric_only = true;
|
||||||
|
err = parse_events(evsel_list, smi_cost_attrs, NULL);
|
||||||
|
} else {
|
||||||
|
fprintf(stderr, "To measure SMI cost, it needs "
|
||||||
|
"msr/aperf/, msr/smi/ and cpu/cycles/ support\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (err) {
|
||||||
|
fprintf(stderr, "Cannot set up SMI cost events\n");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (topdown_run) {
|
if (topdown_run) {
|
||||||
char *str = NULL;
|
char *str = NULL;
|
||||||
bool warn = false;
|
bool warn = false;
|
||||||
|
@ -2742,6 +2788,9 @@ int cmd_stat(int argc, const char **argv)
|
||||||
perf_stat__exit_aggr_mode();
|
perf_stat__exit_aggr_mode();
|
||||||
perf_evlist__free_stats(evsel_list);
|
perf_evlist__free_stats(evsel_list);
|
||||||
out:
|
out:
|
||||||
|
if (smi_cost && smi_reset)
|
||||||
|
sysfs__write_int(FREEZE_ON_SMI_PATH, 0);
|
||||||
|
|
||||||
perf_evlist__delete(evsel_list);
|
perf_evlist__delete(evsel_list);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,8 @@ static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS];
|
||||||
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
|
static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS];
|
||||||
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
|
static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS];
|
||||||
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
|
static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS];
|
||||||
|
static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS];
|
||||||
|
static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS];
|
||||||
static struct rblist runtime_saved_values;
|
static struct rblist runtime_saved_values;
|
||||||
static bool have_frontend_stalled;
|
static bool have_frontend_stalled;
|
||||||
|
|
||||||
|
@ -157,6 +159,8 @@ void perf_stat__reset_shadow_stats(void)
|
||||||
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
|
memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued));
|
||||||
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
|
memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles));
|
||||||
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
|
memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles));
|
||||||
|
memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats));
|
||||||
|
memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats));
|
||||||
|
|
||||||
next = rb_first(&runtime_saved_values.entries);
|
next = rb_first(&runtime_saved_values.entries);
|
||||||
while (next) {
|
while (next) {
|
||||||
|
@ -217,6 +221,10 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
|
||||||
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
|
update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
|
||||||
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
|
else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
|
||||||
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
|
update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
|
||||||
|
else if (perf_stat_evsel__is(counter, SMI_NUM))
|
||||||
|
update_stats(&runtime_smi_num_stats[ctx][cpu], count[0]);
|
||||||
|
else if (perf_stat_evsel__is(counter, APERF))
|
||||||
|
update_stats(&runtime_aperf_stats[ctx][cpu], count[0]);
|
||||||
|
|
||||||
if (counter->collect_stat) {
|
if (counter->collect_stat) {
|
||||||
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
|
struct saved_value *v = saved_value_lookup(counter, cpu, ctx,
|
||||||
|
@ -592,6 +600,29 @@ static double td_be_bound(int ctx, int cpu)
|
||||||
return sanitize_val(1.0 - sum);
|
return sanitize_val(1.0 - sum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void print_smi_cost(int cpu, struct perf_evsel *evsel,
|
||||||
|
struct perf_stat_output_ctx *out)
|
||||||
|
{
|
||||||
|
double smi_num, aperf, cycles, cost = 0.0;
|
||||||
|
int ctx = evsel_context(evsel);
|
||||||
|
const char *color = NULL;
|
||||||
|
|
||||||
|
smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]);
|
||||||
|
aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]);
|
||||||
|
cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]);
|
||||||
|
|
||||||
|
if ((cycles == 0) || (aperf == 0))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (smi_num)
|
||||||
|
cost = (aperf - cycles) / aperf * 100.00;
|
||||||
|
|
||||||
|
if (cost > 10)
|
||||||
|
color = PERF_COLOR_RED;
|
||||||
|
out->print_metric(out->ctx, color, "%8.1f%%", "SMI cycles%", cost);
|
||||||
|
out->print_metric(out->ctx, NULL, "%4.0f", "SMI#", smi_num);
|
||||||
|
}
|
||||||
|
|
||||||
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||||
double avg, int cpu,
|
double avg, int cpu,
|
||||||
struct perf_stat_output_ctx *out)
|
struct perf_stat_output_ctx *out)
|
||||||
|
@ -825,6 +856,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel,
|
||||||
}
|
}
|
||||||
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
|
snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit);
|
||||||
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
|
print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio);
|
||||||
|
} else if (perf_stat_evsel__is(evsel, SMI_NUM)) {
|
||||||
|
print_smi_cost(cpu, evsel, out);
|
||||||
} else {
|
} else {
|
||||||
print_metric(ctxp, NULL, NULL, NULL, 0);
|
print_metric(ctxp, NULL, NULL, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
|
@ -86,6 +86,8 @@ static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
|
||||||
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
|
ID(TOPDOWN_SLOTS_RETIRED, topdown-slots-retired),
|
||||||
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
|
ID(TOPDOWN_FETCH_BUBBLES, topdown-fetch-bubbles),
|
||||||
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
|
ID(TOPDOWN_RECOVERY_BUBBLES, topdown-recovery-bubbles),
|
||||||
|
ID(SMI_NUM, msr/smi/),
|
||||||
|
ID(APERF, msr/aperf/),
|
||||||
};
|
};
|
||||||
#undef ID
|
#undef ID
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ enum perf_stat_evsel_id {
|
||||||
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
|
PERF_STAT_EVSEL_ID__TOPDOWN_SLOTS_RETIRED,
|
||||||
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
|
PERF_STAT_EVSEL_ID__TOPDOWN_FETCH_BUBBLES,
|
||||||
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
|
PERF_STAT_EVSEL_ID__TOPDOWN_RECOVERY_BUBBLES,
|
||||||
|
PERF_STAT_EVSEL_ID__SMI_NUM,
|
||||||
|
PERF_STAT_EVSEL_ID__APERF,
|
||||||
PERF_STAT_EVSEL_ID__MAX,
|
PERF_STAT_EVSEL_ID__MAX,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue