Merge branch 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull perf fixes from Thomas Gleixner:
 "Another pile of fixes for perf:

   - Plug overflows and races in the core code

   - Sanitize the flow of the perf syscall so we error out before
     handling the more complex and hard to undo setups

   - Improve and fix Broadwell and Skylake hardware support

   - Revert a fix which broke what it tried to fix in perf tools

   - A couple of smaller fixes in various places of perf tools"

* 'perf-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  perf tools: Fix copying of /proc/kcore
  perf intel-pt: Remove no_force_psb from documentation
  perf probe: Use existing routine to look for a kernel module by dso->short_name
  perf/x86: Change test_aperfmperf() and test_intel() to static
  tools lib traceevent: Fix string handling in heterogeneous arch environments
  perf record: Avoid infinite loop at buildid processing with no samples
  perf: Fix races in computing the header sizes
  perf: Fix u16 overflows
  perf: Restructure perf syscall point of no return
  perf/x86/intel: Fix Skylake FRONTEND MSR extrareg mask
  perf/x86/intel/pebs: Add PEBS frontend profiling for Skylake
  perf/x86/intel: Make the CYCLE_ACTIVITY.* constraint on Broadwell more specific
  perf tools: Bool functions shouldn't return -1
  tools build: Add test for presence of __get_cpuid() gcc builtin
  tools build: Add test for presence of numa_num_possible_cpus() in libnuma
  Revert "perf symbols: Fix mismatched declarations for elf_getphdrnum"
  perf stat: Fix per-pkg event reporting bug
This commit is contained in:
Linus Torvalds 2015-09-27 12:51:39 -04:00
commit e3be4266d3
18 changed files with 213 additions and 95 deletions

View File

@ -141,6 +141,8 @@
#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10)
#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11)
#define MSR_PEBS_FRONTEND 0x000003f7
#define MSR_IA32_POWER_CTL 0x000001fc
#define MSR_IA32_MC0_CTL 0x00000400

View File

@ -47,6 +47,7 @@ enum extra_reg_type {
EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */
EXTRA_REG_LBR = 2, /* lbr_select */
EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */
EXTRA_REG_FE = 4, /* fe_* */
EXTRA_REG_MAX /* number of entries needed */
};

View File

@ -205,6 +205,11 @@ static struct extra_reg intel_skl_extra_regs[] __read_mostly = {
INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff8fffull, RSP_0),
INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff8fffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
/*
* Note the low 8 bits eventsel code is not a continuous field, containing
* some #GPing bits. These are masked out.
*/
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
};
@ -250,7 +255,7 @@ struct event_constraint intel_bdw_event_constraints[] = {
FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
INTEL_UEVENT_CONSTRAINT(0x8a3, 0x4), /* CYCLE_ACTIVITY.CYCLES_L1D_MISS */
EVENT_CONSTRAINT_END
};
@ -2891,6 +2896,8 @@ PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
PMU_FORMAT_ATTR(ldlat, "config1:0-15");
PMU_FORMAT_ATTR(frontend, "config1:0-23");
static struct attribute *intel_arch3_formats_attr[] = {
&format_attr_event.attr,
&format_attr_umask.attr,
@ -2907,6 +2914,11 @@ static struct attribute *intel_arch3_formats_attr[] = {
NULL,
};
static struct attribute *skl_format_attr[] = {
&format_attr_frontend.attr,
NULL,
};
static __initconst const struct x86_pmu core_pmu = {
.name = "core",
.handle_irq = x86_pmu_handle_irq,
@ -3516,7 +3528,8 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
x86_pmu.format_attrs = merge_attr(intel_arch3_formats_attr,
skl_format_attr);
WARN_ON(!x86_pmu.format_attrs);
x86_pmu.cpu_events = hsw_events_attrs;
pr_cont("Skylake events, ");

View File

@ -10,12 +10,12 @@ enum perf_msr_id {
PERF_MSR_EVENT_MAX,
};
bool test_aperfmperf(int idx)
static bool test_aperfmperf(int idx)
{
return boot_cpu_has(X86_FEATURE_APERFMPERF);
}
bool test_intel(int idx)
static bool test_intel(int idx)
{
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
boot_cpu_data.x86 != 6)

View File

@ -1243,11 +1243,7 @@ static inline void perf_event__state_init(struct perf_event *event)
PERF_EVENT_STATE_INACTIVE;
}
/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
static void perf_event__read_size(struct perf_event *event)
static void __perf_event_read_size(struct perf_event *event, int nr_siblings)
{
int entry = sizeof(u64); /* value */
int size = 0;
@ -1263,7 +1259,7 @@ static void perf_event__read_size(struct perf_event *event)
entry += sizeof(u64);
if (event->attr.read_format & PERF_FORMAT_GROUP) {
nr += event->group_leader->nr_siblings;
nr += nr_siblings;
size += sizeof(u64);
}
@ -1271,14 +1267,11 @@ static void perf_event__read_size(struct perf_event *event)
event->read_size = size;
}
static void perf_event__header_size(struct perf_event *event)
static void __perf_event_header_size(struct perf_event *event, u64 sample_type)
{
struct perf_sample_data *data;
u64 sample_type = event->attr.sample_type;
u16 size = 0;
perf_event__read_size(event);
if (sample_type & PERF_SAMPLE_IP)
size += sizeof(data->ip);
@ -1303,6 +1296,17 @@ static void perf_event__header_size(struct perf_event *event)
event->header_size = size;
}
/*
* Called at perf_event creation and when events are attached/detached from a
* group.
*/
static void perf_event__header_size(struct perf_event *event)
{
__perf_event_read_size(event,
event->group_leader->nr_siblings);
__perf_event_header_size(event, event->attr.sample_type);
}
static void perf_event__id_header_size(struct perf_event *event)
{
struct perf_sample_data *data;
@ -1330,6 +1334,27 @@ static void perf_event__id_header_size(struct perf_event *event)
event->id_header_size = size;
}
static bool perf_event_validate_size(struct perf_event *event)
{
/*
* The values computed here will be over-written when we actually
* attach the event.
*/
__perf_event_read_size(event, event->group_leader->nr_siblings + 1);
__perf_event_header_size(event, event->attr.sample_type & ~PERF_SAMPLE_READ);
perf_event__id_header_size(event);
/*
* Sum the lot; should not exceed the 64k limit we have on records.
* Conservative limit to allow for callchains and other variable fields.
*/
if (event->read_size + event->header_size +
event->id_header_size + sizeof(struct perf_event_header) >= 16*1024)
return false;
return true;
}
static void perf_group_attach(struct perf_event *event)
{
struct perf_event *group_leader = event->group_leader, *pos;
@ -8297,13 +8322,35 @@ SYSCALL_DEFINE5(perf_event_open,
if (move_group) {
gctx = group_leader->ctx;
mutex_lock_double(&gctx->mutex, &ctx->mutex);
} else {
mutex_lock(&ctx->mutex);
}
if (!perf_event_validate_size(event)) {
err = -E2BIG;
goto err_locked;
}
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
*/
if (!exclusive_event_installable(event, ctx)) {
/* exclusive and group stuff are assumed mutually exclusive */
WARN_ON_ONCE(move_group);
err = -EBUSY;
goto err_locked;
}
WARN_ON_ONCE(ctx->parent_ctx);
if (move_group) {
/*
* See perf_event_ctx_lock() for comments on the details
* of swizzling perf_event::ctx.
*/
mutex_lock_double(&gctx->mutex, &ctx->mutex);
perf_remove_from_context(group_leader, false);
list_for_each_entry(sibling, &group_leader->sibling_list,
@ -8311,13 +8358,7 @@ SYSCALL_DEFINE5(perf_event_open,
perf_remove_from_context(sibling, false);
put_ctx(gctx);
}
} else {
mutex_lock(&ctx->mutex);
}
WARN_ON_ONCE(ctx->parent_ctx);
if (move_group) {
/*
* Wait for everybody to stop referencing the events through
* the old lists, before installing it on new lists.
@ -8349,22 +8390,29 @@ SYSCALL_DEFINE5(perf_event_open,
perf_event__state_init(group_leader);
perf_install_in_context(ctx, group_leader, group_leader->cpu);
get_ctx(ctx);
/*
* Now that all events are installed in @ctx, nothing
* references @gctx anymore, so drop the last reference we have
* on it.
*/
put_ctx(gctx);
}
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
mutex_unlock(&ctx->mutex);
fput(event_file);
goto err_context;
}
/*
* Precalculate sample_data sizes; do while holding ctx::mutex such
* that we're serialized against further additions and before
* perf_install_in_context() which is the point the event is active and
* can use these values.
*/
perf_event__header_size(event);
perf_event__id_header_size(event);
perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
if (move_group) {
if (move_group)
mutex_unlock(&gctx->mutex);
put_ctx(gctx);
}
mutex_unlock(&ctx->mutex);
put_online_cpus();
@ -8375,12 +8423,6 @@ SYSCALL_DEFINE5(perf_event_open,
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
/*
* Precalculate sample_data sizes
*/
perf_event__header_size(event);
perf_event__id_header_size(event);
/*
* Drop the reference on the group_event after placing the
* new event on the sibling_list. This ensures destruction
@ -8391,6 +8433,12 @@ SYSCALL_DEFINE5(perf_event_open,
fd_install(event_fd, event_file);
return event_fd;
err_locked:
if (move_group)
mutex_unlock(&gctx->mutex);
mutex_unlock(&ctx->mutex);
/* err_file: */
fput(event_file);
err_context:
perf_unpin_context(ctx);
put_ctx(ctx);

View File

@ -41,6 +41,7 @@ FEATURE_TESTS ?= \
libelf-getphdrnum \
libelf-mmap \
libnuma \
numa_num_possible_cpus \
libperl \
libpython \
libpython-version \
@ -51,7 +52,8 @@ FEATURE_TESTS ?= \
timerfd \
libdw-dwarf-unwind \
zlib \
lzma
lzma \
get_cpuid
FEATURE_DISPLAY ?= \
dwarf \
@ -61,13 +63,15 @@ FEATURE_DISPLAY ?= \
libbfd \
libelf \
libnuma \
numa_num_possible_cpus \
libperl \
libpython \
libslang \
libunwind \
libdw-dwarf-unwind \
zlib \
lzma
lzma \
get_cpuid
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not

View File

@ -19,6 +19,7 @@ FILES= \
test-libelf-getphdrnum.bin \
test-libelf-mmap.bin \
test-libnuma.bin \
test-numa_num_possible_cpus.bin \
test-libperl.bin \
test-libpython.bin \
test-libpython-version.bin \
@ -34,7 +35,8 @@ FILES= \
test-compile-x32.bin \
test-zlib.bin \
test-lzma.bin \
test-bpf.bin
test-bpf.bin \
test-get_cpuid.bin
CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
@ -87,6 +89,9 @@ test-libelf-getphdrnum.bin:
test-libnuma.bin:
$(BUILD) -lnuma
test-numa_num_possible_cpus.bin:
$(BUILD) -lnuma
test-libunwind.bin:
$(BUILD) -lelf
@ -162,6 +167,9 @@ test-zlib.bin:
test-lzma.bin:
$(BUILD) -llzma
test-get_cpuid.bin:
$(BUILD)
test-bpf.bin:
$(BUILD)

View File

@ -77,6 +77,10 @@
# include "test-libnuma.c"
#undef main
#define main main_test_numa_num_possible_cpus
# include "test-numa_num_possible_cpus.c"
#undef main
#define main main_test_timerfd
# include "test-timerfd.c"
#undef main
@ -117,6 +121,10 @@
# include "test-lzma.c"
#undef main
#define main main_test_get_cpuid
# include "test-get_cpuid.c"
#undef main
int main(int argc, char *argv[])
{
main_test_libpython();
@ -136,6 +144,7 @@ int main(int argc, char *argv[])
main_test_libbfd();
main_test_backtrace();
main_test_libnuma();
main_test_numa_num_possible_cpus();
main_test_timerfd();
main_test_stackprotector_all();
main_test_libdw_dwarf_unwind();
@ -143,6 +152,7 @@ int main(int argc, char *argv[])
main_test_zlib();
main_test_pthread_attr_setaffinity_np();
main_test_lzma();
main_test_get_cpuid();
return 0;
}

View File

@ -0,0 +1,7 @@
#include <cpuid.h>
int main(void)
{
unsigned int eax = 0, ebx = 0, ecx = 0, edx = 0;
return __get_cpuid(0x15, &eax, &ebx, &ecx, &edx);
}

View File

@ -0,0 +1,6 @@
#include <numa.h>
int main(void)
{
return numa_num_possible_cpus();
}

View File

@ -3795,7 +3795,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
struct format_field *field;
struct printk_map *printk;
long long val, fval;
unsigned long addr;
unsigned long long addr;
char *str;
unsigned char *hex;
int print;
@ -3828,13 +3828,30 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,
*/
if (!(field->flags & FIELD_IS_ARRAY) &&
field->size == pevent->long_size) {
addr = *(unsigned long *)(data + field->offset);
/* Handle heterogeneous recording and processing
* architectures
*
* CASE I:
* Traces recorded on 32-bit devices (32-bit
* addressing) and processed on 64-bit devices:
* In this case, only 32 bits should be read.
*
* CASE II:
* Traces recorded on 64 bit devices and processed
* on 32-bit devices:
* In this case, 64 bits must be read.
*/
addr = (pevent->long_size == 8) ?
*(unsigned long long *)(data + field->offset) :
(unsigned long long)*(unsigned int *)(data + field->offset);
/* Check if it matches a print format */
printk = find_printk(pevent, addr);
if (printk)
trace_seq_puts(s, printk->printk);
else
trace_seq_printf(s, "%lx", addr);
trace_seq_printf(s, "%llx", addr);
break;
}
str = malloc(len + 1);

View File

@ -364,21 +364,6 @@ cyc_thresh Specifies how frequently CYC packets are produced - see cyc
CYC packets are not requested by default.
no_force_psb This is a driver option and is not in the IA32_RTIT_CTL MSR.
It stops the driver resetting the byte count to zero whenever
enabling the trace (for example on context switches) which in
turn results in no PSB being forced. However some processors
will produce a PSB anyway.
In any case, there is still a PSB when the trace is enabled for
the first time.
no_force_psb can be used to slightly decrease the trace size but
may make it harder for the decoder to recover from errors.
no_force_psb is not selected by default.
new snapshot option
-------------------

View File

@ -572,12 +572,17 @@ ifndef NO_LIBNUMA
ifeq ($(feature-libnuma), 0)
msg := $(warning No numa.h found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev);
NO_LIBNUMA := 1
else
ifeq ($(feature-numa_num_possible_cpus), 0)
msg := $(warning Old numa library found, disables 'perf bench numa mem' benchmark, please install numactl-devel/libnuma-devel/libnuma-dev >= 2.0.8);
NO_LIBNUMA := 1
else
CFLAGS += -DHAVE_LIBNUMA_SUPPORT
EXTLIBS += -lnuma
$(call detected,CONFIG_NUMA)
endif
endif
endif
ifdef HAVE_KVM_STAT_SUPPORT
CFLAGS += -DHAVE_KVM_STAT_SUPPORT
@ -621,9 +626,14 @@ ifdef LIBBABELTRACE
endif
ifndef NO_AUXTRACE
ifeq ($(feature-get_cpuid), 0)
msg := $(warning Your gcc lacks the __get_cpuid() builtin, disables support for auxtrace/Intel PT, please install a newer gcc);
NO_AUXTRACE := 1
else
$(call detected,CONFIG_AUXTRACE)
CFLAGS += -DHAVE_AUXTRACE_SUPPORT
endif
endif
# Among the variables below, these:
# perfexecdir

View File

@ -270,11 +270,12 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
int ret = 0;
if (module) {
list_for_each_entry(dso, &host_machine->dsos.head, node) {
if (!dso->kernel)
continue;
if (strncmp(dso->short_name + 1, module,
dso->short_name_len - 2) == 0)
char module_name[128];
snprintf(module_name, sizeof(module_name), "[%s]", module);
map = map_groups__find_by_name(&host_machine->kmaps, MAP__FUNCTION, module_name);
if (map) {
dso = map->dso;
goto found;
}
pr_debug("Failed to find module %s.\n", module);

View File

@ -1580,7 +1580,10 @@ static int __perf_session__process_events(struct perf_session *session,
file_offset = page_offset;
head = data_offset - page_offset;
if (data_size && (data_offset + data_size < file_size))
if (data_size == 0)
goto out;
if (data_offset + data_size < file_size)
file_size = data_offset + data_size;
ui_progress__init(&prog, file_size, "Processing events...");

View File

@ -196,7 +196,8 @@ static void zero_per_pkg(struct perf_evsel *counter)
memset(counter->per_pkg_mask, 0, MAX_NR_CPUS);
}
static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
static int check_per_pkg(struct perf_evsel *counter,
struct perf_counts_values *vals, int cpu, bool *skip)
{
unsigned long *mask = counter->per_pkg_mask;
struct cpu_map *cpus = perf_evsel__cpus(counter);
@ -218,6 +219,17 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip)
counter->per_pkg_mask = mask;
}
/*
* we do not consider an event that has not run as a good
* instance to mark a package as used (skip=1). Otherwise
* we may run into a situation where the first CPU in a package
* is not running anything, yet the second is, and this function
* would mark the package as used after the first CPU and would
* not read the values from the second CPU.
*/
if (!(vals->run && vals->ena))
return 0;
s = cpu_map__get_socket(cpus, cpu);
if (s < 0)
return -1;
@ -235,7 +247,7 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel
static struct perf_counts_values zero;
bool skip = false;
if (check_per_pkg(evsel, cpu, &skip)) {
if (check_per_pkg(evsel, count, cpu, &skip)) {
pr_err("failed to read per-pkg counter\n");
return -1;
}

View File

@ -38,7 +38,7 @@ static inline char *bfd_demangle(void __maybe_unused *v,
#endif
#ifndef HAVE_ELF_GETPHDRNUM_SUPPORT
int elf_getphdrnum(Elf *elf, size_t *dst)
static int elf_getphdrnum(Elf *elf, size_t *dst)
{
GElf_Ehdr gehdr;
GElf_Ehdr *ehdr;
@ -1271,8 +1271,6 @@ out_close:
static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
bool temp)
{
GElf_Ehdr *ehdr;
kcore->elfclass = elfclass;
if (temp)
@ -1289,9 +1287,7 @@ static int kcore__init(struct kcore *kcore, char *filename, int elfclass,
if (!gelf_newehdr(kcore->elf, elfclass))
goto out_end;
ehdr = gelf_getehdr(kcore->elf, &kcore->ehdr);
if (!ehdr)
goto out_end;
memset(&kcore->ehdr, 0, sizeof(GElf_Ehdr));
return 0;
@ -1348,23 +1344,18 @@ static int kcore__copy_hdr(struct kcore *from, struct kcore *to, size_t count)
static int kcore__add_phdr(struct kcore *kcore, int idx, off_t offset,
u64 addr, u64 len)
{
GElf_Phdr gphdr;
GElf_Phdr *phdr;
GElf_Phdr phdr = {
.p_type = PT_LOAD,
.p_flags = PF_R | PF_W | PF_X,
.p_offset = offset,
.p_vaddr = addr,
.p_paddr = 0,
.p_filesz = len,
.p_memsz = len,
.p_align = page_size,
};
phdr = gelf_getphdr(kcore->elf, idx, &gphdr);
if (!phdr)
return -1;
phdr->p_type = PT_LOAD;
phdr->p_flags = PF_R | PF_W | PF_X;
phdr->p_offset = offset;
phdr->p_vaddr = addr;
phdr->p_paddr = 0;
phdr->p_filesz = len;
phdr->p_memsz = len;
phdr->p_align = page_size;
if (!gelf_update_phdr(kcore->elf, idx, phdr))
if (!gelf_update_phdr(kcore->elf, idx, &phdr))
return -1;
return 0;

View File

@ -709,7 +709,7 @@ bool find_process(const char *name)
dir = opendir(procfs__mountpoint());
if (!dir)
return -1;
return false;
/* Walk through the directory. */
while (ret && (d = readdir(dir)) != NULL) {