Merge branch 'perf/x86' into perf/core, because it's ready
Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
936c663aed
|
@ -648,7 +648,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
|
|||
* Per-cpu breakpoints are not supported by our stepping
|
||||
* mechanism.
|
||||
*/
|
||||
if (!bp->hw.bp_target)
|
||||
if (!bp->hw.target)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
|
|
|
@ -527,7 +527,7 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
|
|||
* Disallow per-task kernel breakpoints since these would
|
||||
* complicate the stepping code.
|
||||
*/
|
||||
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.bp_target)
|
||||
if (info->ctrl.privilege == AARCH64_BREAKPOINT_EL1 && bp->hw.target)
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
#include <asm/disabled-features.h>
|
||||
#endif
|
||||
|
||||
#define NCAPINTS 11 /* N 32-bit words worth of info */
|
||||
#define NCAPINTS 13 /* N 32-bit words worth of info */
|
||||
#define NBUGINTS 1 /* N 32-bit bug flags */
|
||||
|
||||
/*
|
||||
|
@ -226,6 +226,7 @@
|
|||
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */
|
||||
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
|
||||
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
|
||||
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
|
||||
#define X86_FEATURE_MPX ( 9*32+14) /* Memory Protection Extension */
|
||||
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
|
||||
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */
|
||||
|
@ -242,6 +243,12 @@
|
|||
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */
|
||||
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */
|
||||
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
|
||||
|
||||
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */
|
||||
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */
|
||||
|
||||
/*
|
||||
* BUG word(s)
|
||||
*/
|
||||
|
|
|
@ -109,6 +109,9 @@ struct cpuinfo_x86 {
|
|||
/* in KB - valid for CPUS which support this call: */
|
||||
int x86_cache_size;
|
||||
int x86_cache_alignment; /* In bytes */
|
||||
/* Cache QoS architectural values: */
|
||||
int x86_cache_max_rmid; /* max index */
|
||||
int x86_cache_occ_scale; /* scale to bytes */
|
||||
int x86_power;
|
||||
unsigned long loops_per_jiffy;
|
||||
/* cpuid returned max cores value: */
|
||||
|
|
|
@ -39,7 +39,7 @@ obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o
|
|||
endif
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o
|
||||
obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_rapl.o perf_event_intel_cqm.o
|
||||
|
||||
obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += perf_event_intel_uncore.o \
|
||||
perf_event_intel_uncore_snb.o \
|
||||
|
|
|
@ -646,6 +646,30 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
|
|||
c->x86_capability[10] = eax;
|
||||
}
|
||||
|
||||
/* Additional Intel-defined flags: level 0x0000000F */
|
||||
if (c->cpuid_level >= 0x0000000F) {
|
||||
u32 eax, ebx, ecx, edx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=0 */
|
||||
cpuid_count(0x0000000F, 0, &eax, &ebx, &ecx, &edx);
|
||||
c->x86_capability[11] = edx;
|
||||
if (cpu_has(c, X86_FEATURE_CQM_LLC)) {
|
||||
/* will be overridden if occupancy monitoring exists */
|
||||
c->x86_cache_max_rmid = ebx;
|
||||
|
||||
/* QoS sub-leaf, EAX=0Fh, ECX=1 */
|
||||
cpuid_count(0x0000000F, 1, &eax, &ebx, &ecx, &edx);
|
||||
c->x86_capability[12] = edx;
|
||||
if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC)) {
|
||||
c->x86_cache_max_rmid = ecx;
|
||||
c->x86_cache_occ_scale = ebx;
|
||||
}
|
||||
} else {
|
||||
c->x86_cache_max_rmid = -1;
|
||||
c->x86_cache_occ_scale = -1;
|
||||
}
|
||||
}
|
||||
|
||||
/* AMD-defined flags: level 0x80000001 */
|
||||
xlvl = cpuid_eax(0x80000000);
|
||||
c->extended_cpuid_level = xlvl;
|
||||
|
@ -834,6 +858,20 @@ static void generic_identify(struct cpuinfo_x86 *c)
|
|||
detect_nopl(c);
|
||||
}
|
||||
|
||||
static void x86_init_cache_qos(struct cpuinfo_x86 *c)
|
||||
{
|
||||
/*
|
||||
* The heavy lifting of max_rmid and cache_occ_scale are handled
|
||||
* in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu
|
||||
* in case CQM bits really aren't there in this CPU.
|
||||
*/
|
||||
if (c != &boot_cpu_data) {
|
||||
boot_cpu_data.x86_cache_max_rmid =
|
||||
min(boot_cpu_data.x86_cache_max_rmid,
|
||||
c->x86_cache_max_rmid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This does the hard work of actually picking apart the CPU stuff...
|
||||
*/
|
||||
|
@ -923,6 +961,7 @@ static void identify_cpu(struct cpuinfo_x86 *c)
|
|||
|
||||
init_hypervisor(c);
|
||||
x86_init_rdrand(c);
|
||||
x86_init_cache_qos(c);
|
||||
|
||||
/*
|
||||
* Clear/Set all flags overriden by options, need do it
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -53,6 +53,7 @@ struct perf_guest_info_callbacks {
|
|||
#include <linux/sysfs.h>
|
||||
#include <linux/perf_regs.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <asm/local.h>
|
||||
|
||||
struct perf_callchain_entry {
|
||||
|
@ -118,10 +119,16 @@ struct hw_perf_event {
|
|||
struct hrtimer hrtimer;
|
||||
};
|
||||
struct { /* tracepoint */
|
||||
struct task_struct *tp_target;
|
||||
/* for tp_event->class */
|
||||
struct list_head tp_list;
|
||||
};
|
||||
struct { /* intel_cqm */
|
||||
int cqm_state;
|
||||
int cqm_rmid;
|
||||
struct list_head cqm_events_entry;
|
||||
struct list_head cqm_groups_entry;
|
||||
struct list_head cqm_group_entry;
|
||||
};
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
struct { /* breakpoint */
|
||||
/*
|
||||
|
@ -129,12 +136,12 @@ struct hw_perf_event {
|
|||
* problem hw_breakpoint has with context
|
||||
* creation and event initalization.
|
||||
*/
|
||||
struct task_struct *bp_target;
|
||||
struct arch_hw_breakpoint info;
|
||||
struct list_head bp_list;
|
||||
};
|
||||
#endif
|
||||
};
|
||||
struct task_struct *target;
|
||||
int state;
|
||||
local64_t prev_count;
|
||||
u64 sample_period;
|
||||
|
@ -271,6 +278,11 @@ struct pmu {
|
|||
*/
|
||||
size_t task_ctx_size;
|
||||
|
||||
|
||||
/*
|
||||
* Return the count value for a counter.
|
||||
*/
|
||||
u64 (*count) (struct perf_event *event); /*optional*/
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -547,6 +559,35 @@ struct perf_output_handle {
|
|||
int page;
|
||||
};
|
||||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
/*
|
||||
* perf_cgroup_info keeps track of time_enabled for a cgroup.
|
||||
* This is a per-cpu dynamically allocated data structure.
|
||||
*/
|
||||
struct perf_cgroup_info {
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
struct perf_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
struct perf_cgroup_info __percpu *info;
|
||||
};
|
||||
|
||||
/*
|
||||
* Must ensure cgroup is pinned (css_get) before calling
|
||||
* this function. In other words, we cannot call this function
|
||||
* if there is no cgroup event for the current CPU context.
|
||||
*/
|
||||
static inline struct perf_cgroup *
|
||||
perf_cgroup_from_task(struct task_struct *task)
|
||||
{
|
||||
return container_of(task_css(task, perf_event_cgrp_id),
|
||||
struct perf_cgroup, css);
|
||||
}
|
||||
#endif /* CONFIG_CGROUP_PERF */
|
||||
|
||||
#ifdef CONFIG_PERF_EVENTS
|
||||
|
||||
extern int perf_pmu_register(struct pmu *pmu, const char *name, int type);
|
||||
|
@ -740,6 +781,11 @@ static inline void perf_event_task_sched_out(struct task_struct *prev,
|
|||
__perf_event_task_sched_out(prev, next);
|
||||
}
|
||||
|
||||
static inline u64 __perf_event_count(struct perf_event *event)
|
||||
{
|
||||
return local64_read(&event->count) + atomic64_read(&event->child_count);
|
||||
}
|
||||
|
||||
extern void perf_event_mmap(struct vm_area_struct *vma);
|
||||
extern struct perf_guest_info_callbacks *perf_guest_cbs;
|
||||
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
|
||||
|
|
|
@ -34,11 +34,11 @@
|
|||
#include <linux/syscalls.h>
|
||||
#include <linux/anon_inodes.h>
|
||||
#include <linux/kernel_stat.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/perf_event.h>
|
||||
#include <linux/ftrace_event.h>
|
||||
#include <linux/hw_breakpoint.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/cgroup.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/mman.h>
|
||||
#include <linux/compat.h>
|
||||
|
@ -351,32 +351,6 @@ static void perf_ctx_unlock(struct perf_cpu_context *cpuctx,
|
|||
|
||||
#ifdef CONFIG_CGROUP_PERF
|
||||
|
||||
/*
|
||||
* perf_cgroup_info keeps track of time_enabled for a cgroup.
|
||||
* This is a per-cpu dynamically allocated data structure.
|
||||
*/
|
||||
struct perf_cgroup_info {
|
||||
u64 time;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
struct perf_cgroup {
|
||||
struct cgroup_subsys_state css;
|
||||
struct perf_cgroup_info __percpu *info;
|
||||
};
|
||||
|
||||
/*
|
||||
* Must ensure cgroup is pinned (css_get) before calling
|
||||
* this function. In other words, we cannot call this function
|
||||
* if there is no cgroup event for the current CPU context.
|
||||
*/
|
||||
static inline struct perf_cgroup *
|
||||
perf_cgroup_from_task(struct task_struct *task)
|
||||
{
|
||||
return container_of(task_css(task, perf_event_cgrp_id),
|
||||
struct perf_cgroup, css);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
perf_cgroup_match(struct perf_event *event)
|
||||
{
|
||||
|
@ -3220,7 +3194,10 @@ static void __perf_event_read(void *info)
|
|||
|
||||
static inline u64 perf_event_count(struct perf_event *event)
|
||||
{
|
||||
return local64_read(&event->count) + atomic64_read(&event->child_count);
|
||||
if (event->pmu->count)
|
||||
return event->pmu->count(event);
|
||||
|
||||
return __perf_event_count(event);
|
||||
}
|
||||
|
||||
static u64 perf_event_read(struct perf_event *event)
|
||||
|
@ -7149,7 +7126,7 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
|||
struct perf_event *group_leader,
|
||||
struct perf_event *parent_event,
|
||||
perf_overflow_handler_t overflow_handler,
|
||||
void *context)
|
||||
void *context, int cgroup_fd)
|
||||
{
|
||||
struct pmu *pmu;
|
||||
struct perf_event *event;
|
||||
|
@ -7204,16 +7181,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
|||
|
||||
if (task) {
|
||||
event->attach_state = PERF_ATTACH_TASK;
|
||||
|
||||
if (attr->type == PERF_TYPE_TRACEPOINT)
|
||||
event->hw.tp_target = task;
|
||||
#ifdef CONFIG_HAVE_HW_BREAKPOINT
|
||||
/*
|
||||
* hw_breakpoint is a bit difficult here..
|
||||
* XXX pmu::event_init needs to know what task to account to
|
||||
* and we cannot use the ctx information because we need the
|
||||
* pmu before we get a ctx.
|
||||
*/
|
||||
else if (attr->type == PERF_TYPE_BREAKPOINT)
|
||||
event->hw.bp_target = task;
|
||||
#endif
|
||||
event->hw.target = task;
|
||||
}
|
||||
|
||||
if (!overflow_handler && parent_event) {
|
||||
|
@ -7245,6 +7218,12 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
|
|||
if (!has_branch_stack(event))
|
||||
event->attr.branch_sample_type = 0;
|
||||
|
||||
if (cgroup_fd != -1) {
|
||||
err = perf_cgroup_connect(cgroup_fd, event, attr, group_leader);
|
||||
if (err)
|
||||
goto err_ns;
|
||||
}
|
||||
|
||||
pmu = perf_init_event(event);
|
||||
if (!pmu)
|
||||
goto err_ns;
|
||||
|
@ -7268,6 +7247,8 @@ err_pmu:
|
|||
event->destroy(event);
|
||||
module_put(pmu->module);
|
||||
err_ns:
|
||||
if (is_cgroup_event(event))
|
||||
perf_detach_cgroup(event);
|
||||
if (event->ns)
|
||||
put_pid_ns(event->ns);
|
||||
kfree(event);
|
||||
|
@ -7486,6 +7467,7 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
int move_group = 0;
|
||||
int err;
|
||||
int f_flags = O_RDWR;
|
||||
int cgroup_fd = -1;
|
||||
|
||||
/* for future expandability... */
|
||||
if (flags & ~PERF_FLAG_ALL)
|
||||
|
@ -7551,21 +7533,16 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
|
||||
get_online_cpus();
|
||||
|
||||
if (flags & PERF_FLAG_PID_CGROUP)
|
||||
cgroup_fd = pid;
|
||||
|
||||
event = perf_event_alloc(&attr, cpu, task, group_leader, NULL,
|
||||
NULL, NULL);
|
||||
NULL, NULL, cgroup_fd);
|
||||
if (IS_ERR(event)) {
|
||||
err = PTR_ERR(event);
|
||||
goto err_cpus;
|
||||
}
|
||||
|
||||
if (flags & PERF_FLAG_PID_CGROUP) {
|
||||
err = perf_cgroup_connect(pid, event, &attr, group_leader);
|
||||
if (err) {
|
||||
__free_event(event);
|
||||
goto err_cpus;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_sampling_event(event)) {
|
||||
if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) {
|
||||
err = -ENOTSUPP;
|
||||
|
@ -7802,7 +7779,7 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
|
|||
*/
|
||||
|
||||
event = perf_event_alloc(attr, cpu, task, NULL, NULL,
|
||||
overflow_handler, context);
|
||||
overflow_handler, context, -1);
|
||||
if (IS_ERR(event)) {
|
||||
err = PTR_ERR(event);
|
||||
goto err;
|
||||
|
@ -8163,7 +8140,7 @@ inherit_event(struct perf_event *parent_event,
|
|||
parent_event->cpu,
|
||||
child,
|
||||
group_leader, parent_event,
|
||||
NULL, NULL);
|
||||
NULL, NULL, -1);
|
||||
if (IS_ERR(child_event))
|
||||
return child_event;
|
||||
|
||||
|
|
|
@ -116,12 +116,12 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
|
|||
*/
|
||||
static int task_bp_pinned(int cpu, struct perf_event *bp, enum bp_type_idx type)
|
||||
{
|
||||
struct task_struct *tsk = bp->hw.bp_target;
|
||||
struct task_struct *tsk = bp->hw.target;
|
||||
struct perf_event *iter;
|
||||
int count = 0;
|
||||
|
||||
list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
|
||||
if (iter->hw.bp_target == tsk &&
|
||||
if (iter->hw.target == tsk &&
|
||||
find_slot_idx(iter) == type &&
|
||||
(iter->cpu < 0 || cpu == iter->cpu))
|
||||
count += hw_breakpoint_weight(iter);
|
||||
|
@ -153,7 +153,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
|
|||
int nr;
|
||||
|
||||
nr = info->cpu_pinned;
|
||||
if (!bp->hw.bp_target)
|
||||
if (!bp->hw.target)
|
||||
nr += max_task_bp_pinned(cpu, type);
|
||||
else
|
||||
nr += task_bp_pinned(cpu, bp, type);
|
||||
|
@ -210,7 +210,7 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
|
|||
weight = -weight;
|
||||
|
||||
/* Pinned counter cpu profiling */
|
||||
if (!bp->hw.bp_target) {
|
||||
if (!bp->hw.target) {
|
||||
get_bp_info(bp->cpu, type)->cpu_pinned += weight;
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -1005,7 +1005,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
|
|||
return true;
|
||||
|
||||
list_for_each_entry(event, &filter->perf_events, hw.tp_list) {
|
||||
if (event->hw.tp_target->mm == mm)
|
||||
if (event->hw.target->mm == mm)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -1015,7 +1015,7 @@ __uprobe_perf_filter(struct trace_uprobe_filter *filter, struct mm_struct *mm)
|
|||
static inline bool
|
||||
uprobe_filter_event(struct trace_uprobe *tu, struct perf_event *event)
|
||||
{
|
||||
return __uprobe_perf_filter(&tu->filter, event->hw.tp_target->mm);
|
||||
return __uprobe_perf_filter(&tu->filter, event->hw.target->mm);
|
||||
}
|
||||
|
||||
static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
|
||||
|
@ -1023,10 +1023,10 @@ static int uprobe_perf_close(struct trace_uprobe *tu, struct perf_event *event)
|
|||
bool done;
|
||||
|
||||
write_lock(&tu->filter.rwlock);
|
||||
if (event->hw.tp_target) {
|
||||
if (event->hw.target) {
|
||||
list_del(&event->hw.tp_list);
|
||||
done = tu->filter.nr_systemwide ||
|
||||
(event->hw.tp_target->flags & PF_EXITING) ||
|
||||
(event->hw.target->flags & PF_EXITING) ||
|
||||
uprobe_filter_event(tu, event);
|
||||
} else {
|
||||
tu->filter.nr_systemwide--;
|
||||
|
@ -1046,7 +1046,7 @@ static int uprobe_perf_open(struct trace_uprobe *tu, struct perf_event *event)
|
|||
int err;
|
||||
|
||||
write_lock(&tu->filter.rwlock);
|
||||
if (event->hw.tp_target) {
|
||||
if (event->hw.target) {
|
||||
/*
|
||||
* event->parent != NULL means copy_process(), we can avoid
|
||||
* uprobe_apply(). current->mm must be probed and we can rely
|
||||
|
|
Loading…
Reference in New Issue