Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf fixes from Thomas Gleixner: "A pile of perf related fixes: Kernel: - Fix SLOTS PEBS event constraints for Icelake CPUs - Add the missing mask bit to allow counting hardware generated prefetches on L3 for Icelake CPUs - Make the test for hypervisor platforms more accurate (as far as possible) - Handle PMUs correctly which override event->cpu - Yet another missing fallthrough annotation Tools: perf.data: - Fix loading of compressed data split across adjacent records - Fix buffer size setting for processing CPU topology perf.data header. perf stat: - Fix segfault for event group in repeat mode - Always separate "stalled cycles per insn" line, it was being appended to the "instructions" line. perf script: - Fix --max-blocks man page description. - Improve man page description of metrics. - Fix off by one in brstackinsn IPC computation. perf probe: - Avoid calling freeing routine multiple times for same pointer. perf build: - Do not use -Wshadow on gcc < 4.8, avoiding too strict warnings treated as errors, breaking the build" * 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Mark expected switch fall-throughs perf/core: Fix creating kernel counters for PMUs that override event->cpu perf/x86: Apply more accurate check on hypervisor platform perf/x86/intel: Fix invalid Bit 13 for Icelake MSR_OFFCORE_RSP_x register perf/x86/intel: Fix SLOTS PEBS event constraint perf build: Do not use -Wshadow on gcc < 4.8 perf probe: Avoid calling freeing routine multiple times for same pointer perf probe: Set pev->nargs to zero after freeing pev->args entries perf session: Fix loading of compressed data split across adjacent records perf stat: Always separate stalled cycles per insn perf stat: Fix segfault for event group in repeat mode perf tools: Fix proper buffer size for feature processing perf script: Fix off by one in brstackinsn IPC computation perf script: Improve man page description of metrics perf script: Fix --max-blocks man page description
This commit is contained in:
commit
750991f9af
|
@ -20,7 +20,6 @@
|
|||
#include <asm/intel-family.h>
|
||||
#include <asm/apic.h>
|
||||
#include <asm/cpu_device_id.h>
|
||||
#include <asm/hypervisor.h>
|
||||
|
||||
#include "../perf_event.h"
|
||||
|
||||
|
@ -263,8 +262,8 @@ static struct event_constraint intel_icl_event_constraints[] = {
|
|||
};
|
||||
|
||||
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
|
||||
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
|
||||
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
|
||||
EVENT_EXTRA_END
|
||||
|
@ -4053,7 +4052,7 @@ static bool check_msr(unsigned long msr, u64 mask)
|
|||
* Disable the check for real HW, so we don't
|
||||
* mess with potentionaly enabled registers:
|
||||
*/
|
||||
if (hypervisor_is_type(X86_HYPER_NATIVE))
|
||||
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
|
||||
return true;
|
||||
|
||||
/*
|
||||
|
|
|
@ -851,7 +851,7 @@ struct event_constraint intel_skl_pebs_event_constraints[] = {
|
|||
|
||||
struct event_constraint intel_icl_pebs_event_constraints[] = {
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
|
||||
|
||||
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
|
||||
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
|
||||
|
|
|
@ -11274,7 +11274,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
|||
goto err_unlock;
|
||||
}
|
||||
|
||||
perf_install_in_context(ctx, event, cpu);
|
||||
perf_install_in_context(ctx, event, event->cpu);
|
||||
perf_unpin_context(ctx);
|
||||
mutex_unlock(&ctx->mutex);
|
||||
|
||||
|
|
|
@ -228,11 +228,11 @@ OPTIONS
|
|||
|
||||
With the metric option perf script can compute metrics for
|
||||
sampling periods, similar to perf stat. This requires
|
||||
specifying a group with multiple metrics with the :S option
|
||||
specifying a group with multiple events defining metrics with the :S option
|
||||
for perf record. perf will sample on the first event, and
|
||||
compute metrics for all the events in the group. Please note
|
||||
print computed metrics for all the events in the group. Please note
|
||||
that the metric computed is averaged over the whole sampling
|
||||
period, not just for the sample point.
|
||||
period (since the last sample), not just for the sample point.
|
||||
|
||||
For sample events it's possible to display misc field with -F +misc option,
|
||||
following letters are displayed for each bit:
|
||||
|
@ -384,7 +384,7 @@ include::itrace.txt[]
|
|||
perf script --time 0%-10%,30%-40%
|
||||
|
||||
--max-blocks::
|
||||
Set the maximum number of program blocks to print with brstackasm for
|
||||
Set the maximum number of program blocks to print with brstackinsn for
|
||||
each sample.
|
||||
|
||||
--reltime::
|
||||
|
|
|
@ -698,6 +698,16 @@ __cmd_probe(int argc, const char **argv)
|
|||
|
||||
ret = perf_add_probe_events(params.events, params.nevents);
|
||||
if (ret < 0) {
|
||||
|
||||
/*
|
||||
* When perf_add_probe_events() fails it calls
|
||||
* cleanup_perf_probe_events(pevs, npevs), i.e.
|
||||
* cleanup_perf_probe_events(params.events, params.nevents), which
|
||||
* will call clear_perf_probe_event(), so set nevents to zero
|
||||
* to avoid cleanup_params() to call clear_perf_probe_event() again
|
||||
* on the same pevs.
|
||||
*/
|
||||
params.nevents = 0;
|
||||
pr_err_with_code(" Error: Failed to add events.", ret);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
@ -1059,7 +1059,7 @@ static int perf_sample__fprintf_brstackinsn(struct perf_sample *sample,
|
|||
|
||||
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
|
||||
if (ip == end) {
|
||||
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
|
||||
printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
|
||||
&total_cycles);
|
||||
if (PRINT_FIELD(SRCCODE))
|
||||
printed += print_srccode(thread, x.cpumode, ip);
|
||||
|
|
|
@ -607,7 +607,13 @@ try_again:
|
|||
* group leaders.
|
||||
*/
|
||||
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
|
||||
perf_evlist__close(evsel_list);
|
||||
|
||||
/*
|
||||
* We need to keep evsel_list alive, because it's processed
|
||||
* later the evsel_list will be closed after.
|
||||
*/
|
||||
if (!STAT_RECORD)
|
||||
perf_evlist__close(evsel_list);
|
||||
|
||||
return WEXITSTATUS(status);
|
||||
}
|
||||
|
@ -1997,6 +2003,7 @@ int cmd_stat(int argc, const char **argv)
|
|||
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
|
||||
}
|
||||
|
||||
perf_evlist__close(evsel_list);
|
||||
perf_session__delete(perf_stat.session);
|
||||
}
|
||||
|
||||
|
|
|
@ -1291,6 +1291,7 @@ static void perf_evsel__free_id(struct perf_evsel *evsel)
|
|||
xyarray__delete(evsel->sample_id);
|
||||
evsel->sample_id = NULL;
|
||||
zfree(&evsel->id);
|
||||
evsel->ids = 0;
|
||||
}
|
||||
|
||||
static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
|
||||
|
@ -2077,6 +2078,7 @@ void perf_evsel__close(struct perf_evsel *evsel)
|
|||
|
||||
perf_evsel__close_fd(evsel);
|
||||
perf_evsel__free_fd(evsel);
|
||||
perf_evsel__free_id(evsel);
|
||||
}
|
||||
|
||||
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
|
||||
|
|
|
@ -3747,7 +3747,7 @@ int perf_event__process_feature(struct perf_session *session,
|
|||
return 0;
|
||||
|
||||
ff.buf = (void *)fe->data;
|
||||
ff.size = event->header.size - sizeof(event->header);
|
||||
ff.size = event->header.size - sizeof(*fe);
|
||||
ff.ph = &session->header;
|
||||
|
||||
if (feat_ops[feat].process(&ff, NULL))
|
||||
|
|
|
@ -2230,6 +2230,7 @@ void clear_perf_probe_event(struct perf_probe_event *pev)
|
|||
field = next;
|
||||
}
|
||||
}
|
||||
pev->nargs = 0;
|
||||
zfree(&pev->args);
|
||||
}
|
||||
|
||||
|
|
|
@ -36,10 +36,16 @@ static int perf_session__process_compressed_event(struct perf_session *session,
|
|||
void *src;
|
||||
size_t decomp_size, src_size;
|
||||
u64 decomp_last_rem = 0;
|
||||
size_t decomp_len = session->header.env.comp_mmap_len;
|
||||
size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
|
||||
struct decomp *decomp, *decomp_last = session->decomp_last;
|
||||
|
||||
decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
|
||||
if (decomp_last) {
|
||||
decomp_last_rem = decomp_last->size - decomp_last->head;
|
||||
decomp_len += decomp_last_rem;
|
||||
}
|
||||
|
||||
mmap_len = sizeof(struct decomp) + decomp_len;
|
||||
decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
|
||||
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
|
||||
if (decomp == MAP_FAILED) {
|
||||
pr_err("Couldn't allocate memory for decompression\n");
|
||||
|
@ -47,10 +53,10 @@ static int perf_session__process_compressed_event(struct perf_session *session,
|
|||
}
|
||||
|
||||
decomp->file_pos = file_offset;
|
||||
decomp->mmap_len = mmap_len;
|
||||
decomp->head = 0;
|
||||
|
||||
if (decomp_last) {
|
||||
decomp_last_rem = decomp_last->size - decomp_last->head;
|
||||
if (decomp_last_rem) {
|
||||
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
|
||||
decomp->size = decomp_last_rem;
|
||||
}
|
||||
|
@ -61,7 +67,7 @@ static int perf_session__process_compressed_event(struct perf_session *session,
|
|||
decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
|
||||
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
|
||||
if (!decomp_size) {
|
||||
munmap(decomp, sizeof(struct decomp) + decomp_len);
|
||||
munmap(decomp, mmap_len);
|
||||
pr_err("Couldn't decompress data\n");
|
||||
return -1;
|
||||
}
|
||||
|
@ -255,15 +261,15 @@ static void perf_session__delete_threads(struct perf_session *session)
|
|||
static void perf_session__release_decomp_events(struct perf_session *session)
|
||||
{
|
||||
struct decomp *next, *decomp;
|
||||
size_t decomp_len;
|
||||
size_t mmap_len;
|
||||
next = session->decomp;
|
||||
decomp_len = session->header.env.comp_mmap_len;
|
||||
do {
|
||||
decomp = next;
|
||||
if (decomp == NULL)
|
||||
break;
|
||||
next = decomp->next;
|
||||
munmap(decomp, decomp_len + sizeof(struct decomp));
|
||||
mmap_len = decomp->mmap_len;
|
||||
munmap(decomp, mmap_len);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
|
|
|
@ -46,6 +46,7 @@ struct perf_session {
|
|||
struct decomp {
|
||||
struct decomp *next;
|
||||
u64 file_pos;
|
||||
size_t mmap_len;
|
||||
u64 head;
|
||||
size_t size;
|
||||
char data[];
|
||||
|
|
|
@ -819,7 +819,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
|
|||
"stalled cycles per insn",
|
||||
ratio);
|
||||
} else if (have_frontend_stalled) {
|
||||
print_metric(config, ctxp, NULL, NULL,
|
||||
out->new_line(config, ctxp);
|
||||
print_metric(config, ctxp, NULL, "%7.2f ",
|
||||
"stalled cycles per insn", 0);
|
||||
}
|
||||
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
|
||||
|
|
|
@ -99,8 +99,8 @@ size_t zstd_decompress_stream(struct zstd_data *data, void *src, size_t src_size
|
|||
while (input.pos < input.size) {
|
||||
ret = ZSTD_decompressStream(data->dstream, &output, &input);
|
||||
if (ZSTD_isError(ret)) {
|
||||
pr_err("failed to decompress (B): %ld -> %ld : %s\n",
|
||||
src_size, output.size, ZSTD_getErrorName(ret));
|
||||
pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
|
||||
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
|
||||
break;
|
||||
}
|
||||
output.dst = dst + output.pos;
|
||||
|
|
|
@ -32,7 +32,6 @@ EXTRA_WARNINGS += -Wno-system-headers
|
|||
EXTRA_WARNINGS += -Wold-style-definition
|
||||
EXTRA_WARNINGS += -Wpacked
|
||||
EXTRA_WARNINGS += -Wredundant-decls
|
||||
EXTRA_WARNINGS += -Wshadow
|
||||
EXTRA_WARNINGS += -Wstrict-prototypes
|
||||
EXTRA_WARNINGS += -Wswitch-default
|
||||
EXTRA_WARNINGS += -Wswitch-enum
|
||||
|
@ -69,8 +68,16 @@ endif
|
|||
# will do for now and keep the above -Wstrict-aliasing=3 in place
|
||||
# in newer systems.
|
||||
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
|
||||
#
|
||||
# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
|
||||
# that takes into account Linus's comments (search for Wshadow) for the reasoning about
|
||||
# -Wshadow not being interesting before gcc 4.8.
|
||||
|
||||
ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
|
||||
EXTRA_WARNINGS += -fno-strict-aliasing
|
||||
EXTRA_WARNINGS += -Wno-shadow
|
||||
else
|
||||
EXTRA_WARNINGS += -Wshadow
|
||||
endif
|
||||
|
||||
ifneq ($(findstring $(MAKEFLAGS), w),w)
|
||||
|
|
Loading…
Reference in New Issue