A set of perf fixes:
- Correct the permission checks for perf event which send SIGTRAP to a different process and clean up that code to be more readable. - Prevent an out of bound MSR access in the x86 perf code which happened due to an incomplete limiting to the actually available hardware counters. - Prevent access to the AMD64_EVENTSEL_HOSTONLY bit when running inside a guest. - Handle small core counter re-enabling correctly by issuing an ACK right before reenabling it to prevent a stale PEBS record being kept around. -----BEGIN PGP SIGNATURE----- iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmEPv6UTHHRnbHhAbGlu dXRyb25peC5kZQAKCRCmGPVMDXSYob8hD/wMmRLAoc/uvJIIICJ+IQVnnU8WToIS Qy1dAPpQMz6pQpRQor1AGpcP89IMnLVhZn84lsd+kw0/Lv630JbWsXvQ8jB2GPHn 17XewPp4l4PDUgKaGEKIjPSjsmnZmzOLTYIy5gWOfA/h5EG/1D+ozvcRGDMaXWUw +65Pinaf2QKfjYZV11SVJMLF5zLYUxMc6vRag00WrcPxd+JO4eVeV36g0LTmhABW fOSDcBOSVrT2w9MYDpNmPvMh3dN2vlfhrEk10NBKslx8uk4t8sV/Jbs+48WhydKa zmdqthtjIekRUSxhiHJve70D9ngveCBSKQDp0Us2BWWxdnM0+HV6ozjuxO0julCH 5tW4413fz2AoZJhWkTn3PE4nPG3apRCnL2B+jTFHHqCjKSkkrNDRJDOEUwasXjV5 jn25DLhOq5ltkMrLFDTV/h2RZqU0fAMV2iwNSkjD3lVLgKt6B3/uSnvE9SXmaJjs njk/1LzeWwY+sk7YYXouPQ2STEDCKvOJGYZSS5pFA03mVaQgfuJxpyHKH+7nj9tV k0FLDLMmSucYIWBq0iapa8cR69e0ZIE48hSNR3AOIIOVh3LusmA4HkogOAQG7kdZ P2nKQUdN+SR8rL9KQRauP63J508fg0kkXNgSAm1lFWBDnFKt6shkkHGcL+5PzxJW 1Bjx2wc52Ww84A== =hhv+ -----END PGP SIGNATURE----- Merge tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Pull perf fixes from Thomas Gleixner: "A set of perf fixes: - Correct the permission checks for perf event which send SIGTRAP to a different process and clean up that code to be more readable. - Prevent an out of bound MSR access in the x86 perf code which happened due to an incomplete limiting to the actually available hardware counters. - Prevent access to the AMD64_EVENTSEL_HOSTONLY bit when running inside a guest. - Handle small core counter re-enabling correctly by issuing an ACK right before reenabling it to prevent a stale PEBS record being kept around" * tag 'perf-urgent-2021-08-08' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: perf/x86/intel: Apply mid ACK for small core perf/x86/amd: Don't touch the AMD64_EVENTSEL_HOSTONLY bit inside the guest perf/x86: Fix out of bound MSR access perf: Refactor permissions check into perf_check_permission() perf: Fix required permissions if sigtrap is requested
This commit is contained in:
commit
74eedeba45
|
@ -2489,13 +2489,15 @@ void perf_clear_dirty_counters(void)
|
|||
return;
|
||||
|
||||
for_each_set_bit(i, cpuc->dirty, X86_PMC_IDX_MAX) {
|
||||
/* Metrics and fake events don't have corresponding HW counters. */
|
||||
if (is_metric_idx(i) || (i == INTEL_PMC_IDX_FIXED_VLBR))
|
||||
continue;
|
||||
else if (i >= INTEL_PMC_IDX_FIXED)
|
||||
if (i >= INTEL_PMC_IDX_FIXED) {
|
||||
/* Metrics and fake events don't have corresponding HW counters. */
|
||||
if ((i - INTEL_PMC_IDX_FIXED) >= hybrid(cpuc->pmu, num_counters_fixed))
|
||||
continue;
|
||||
|
||||
wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + (i - INTEL_PMC_IDX_FIXED), 0);
|
||||
else
|
||||
} else {
|
||||
wrmsrl(x86_pmu_event_addr(i), 0);
|
||||
}
|
||||
}
|
||||
|
||||
bitmap_zero(cpuc->dirty, X86_PMC_IDX_MAX);
|
||||
|
|
|
@ -2904,24 +2904,28 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status)
|
|||
*/
|
||||
static int intel_pmu_handle_irq(struct pt_regs *regs)
|
||||
{
|
||||
struct cpu_hw_events *cpuc;
|
||||
struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
bool late_ack = hybrid_bit(cpuc->pmu, late_ack);
|
||||
bool mid_ack = hybrid_bit(cpuc->pmu, mid_ack);
|
||||
int loops;
|
||||
u64 status;
|
||||
int handled;
|
||||
int pmu_enabled;
|
||||
|
||||
cpuc = this_cpu_ptr(&cpu_hw_events);
|
||||
|
||||
/*
|
||||
* Save the PMU state.
|
||||
* It needs to be restored when leaving the handler.
|
||||
*/
|
||||
pmu_enabled = cpuc->enabled;
|
||||
/*
|
||||
* No known reason to not always do late ACK,
|
||||
* but just in case do it opt-in.
|
||||
* In general, the early ACK is only applied for old platforms.
|
||||
* For the big core starts from Haswell, the late ACK should be
|
||||
* applied.
|
||||
* For the small core after Tremont, we have to do the ACK right
|
||||
* before re-enabling counters, which is in the middle of the
|
||||
* NMI handler.
|
||||
*/
|
||||
if (!x86_pmu.late_ack)
|
||||
if (!late_ack && !mid_ack)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
intel_bts_disable_local();
|
||||
cpuc->enabled = 0;
|
||||
|
@ -2958,6 +2962,8 @@ again:
|
|||
goto again;
|
||||
|
||||
done:
|
||||
if (mid_ack)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
|
||||
cpuc->enabled = pmu_enabled;
|
||||
if (pmu_enabled)
|
||||
|
@ -2969,7 +2975,7 @@ done:
|
|||
* have been reset. This avoids spurious NMIs on
|
||||
* Haswell CPUs.
|
||||
*/
|
||||
if (x86_pmu.late_ack)
|
||||
if (late_ack)
|
||||
apic_write(APIC_LVTPC, APIC_DM_NMI);
|
||||
return handled;
|
||||
}
|
||||
|
@ -6129,7 +6135,6 @@ __init int intel_pmu_init(void)
|
|||
static_branch_enable(&perf_is_hybrid);
|
||||
x86_pmu.num_hybrid_pmus = X86_HYBRID_NUM_PMUS;
|
||||
|
||||
x86_pmu.late_ack = true;
|
||||
x86_pmu.pebs_aliases = NULL;
|
||||
x86_pmu.pebs_prec_dist = true;
|
||||
x86_pmu.pebs_block = true;
|
||||
|
@ -6167,6 +6172,7 @@ __init int intel_pmu_init(void)
|
|||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_CORE_IDX];
|
||||
pmu->name = "cpu_core";
|
||||
pmu->cpu_type = hybrid_big;
|
||||
pmu->late_ack = true;
|
||||
if (cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) {
|
||||
pmu->num_counters = x86_pmu.num_counters + 2;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed + 1;
|
||||
|
@ -6192,6 +6198,7 @@ __init int intel_pmu_init(void)
|
|||
pmu = &x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX];
|
||||
pmu->name = "cpu_atom";
|
||||
pmu->cpu_type = hybrid_small;
|
||||
pmu->mid_ack = true;
|
||||
pmu->num_counters = x86_pmu.num_counters;
|
||||
pmu->num_counters_fixed = x86_pmu.num_counters_fixed;
|
||||
pmu->max_pebs_events = x86_pmu.max_pebs_events;
|
||||
|
|
|
@ -656,6 +656,10 @@ struct x86_hybrid_pmu {
|
|||
struct event_constraint *event_constraints;
|
||||
struct event_constraint *pebs_constraints;
|
||||
struct extra_reg *extra_regs;
|
||||
|
||||
unsigned int late_ack :1,
|
||||
mid_ack :1,
|
||||
enabled_ack :1;
|
||||
};
|
||||
|
||||
static __always_inline struct x86_hybrid_pmu *hybrid_pmu(struct pmu *pmu)
|
||||
|
@ -686,6 +690,16 @@ extern struct static_key_false perf_is_hybrid;
|
|||
__Fp; \
|
||||
}))
|
||||
|
||||
#define hybrid_bit(_pmu, _field) \
|
||||
({ \
|
||||
bool __Fp = x86_pmu._field; \
|
||||
\
|
||||
if (is_hybrid() && (_pmu)) \
|
||||
__Fp = hybrid_pmu(_pmu)->_field; \
|
||||
\
|
||||
__Fp; \
|
||||
})
|
||||
|
||||
enum hybrid_pmu_type {
|
||||
hybrid_big = 0x40,
|
||||
hybrid_small = 0x20,
|
||||
|
@ -755,6 +769,7 @@ struct x86_pmu {
|
|||
|
||||
/* PMI handler bits */
|
||||
unsigned int late_ack :1,
|
||||
mid_ack :1,
|
||||
enabled_ack :1;
|
||||
/*
|
||||
* sysfs attrs
|
||||
|
@ -1115,9 +1130,10 @@ void x86_pmu_stop(struct perf_event *event, int flags);
|
|||
|
||||
static inline void x86_pmu_disable_event(struct perf_event *event)
|
||||
{
|
||||
u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask);
|
||||
struct hw_perf_event *hwc = &event->hw;
|
||||
|
||||
wrmsrl(hwc->config_base, hwc->config);
|
||||
wrmsrl(hwc->config_base, hwc->config & ~disable_mask);
|
||||
|
||||
if (is_counter_pair(hwc))
|
||||
wrmsrl(x86_pmu_config_addr(hwc->idx + 1), 0);
|
||||
|
|
|
@ -11917,6 +11917,37 @@ again:
|
|||
return gctx;
|
||||
}
|
||||
|
||||
static bool
|
||||
perf_check_permission(struct perf_event_attr *attr, struct task_struct *task)
|
||||
{
|
||||
unsigned int ptrace_mode = PTRACE_MODE_READ_REALCREDS;
|
||||
bool is_capable = perfmon_capable();
|
||||
|
||||
if (attr->sigtrap) {
|
||||
/*
|
||||
* perf_event_attr::sigtrap sends signals to the other task.
|
||||
* Require the current task to also have CAP_KILL.
|
||||
*/
|
||||
rcu_read_lock();
|
||||
is_capable &= ns_capable(__task_cred(task)->user_ns, CAP_KILL);
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If the required capabilities aren't available, checks for
|
||||
* ptrace permissions: upgrade to ATTACH, since sending signals
|
||||
* can effectively change the target task.
|
||||
*/
|
||||
ptrace_mode = PTRACE_MODE_ATTACH_REALCREDS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Preserve ptrace permission check for backwards compatibility. The
|
||||
* ptrace check also includes checks that the current task and other
|
||||
* task have matching uids, and is therefore not done here explicitly.
|
||||
*/
|
||||
return is_capable || ptrace_may_access(task, ptrace_mode);
|
||||
}
|
||||
|
||||
/**
|
||||
* sys_perf_event_open - open a performance event, associate it to a task/cpu
|
||||
*
|
||||
|
@ -12163,15 +12194,13 @@ SYSCALL_DEFINE5(perf_event_open,
|
|||
goto err_file;
|
||||
|
||||
/*
|
||||
* Preserve ptrace permission check for backwards compatibility.
|
||||
*
|
||||
* We must hold exec_update_lock across this and any potential
|
||||
* perf_install_in_context() call for this new event to
|
||||
* serialize against exec() altering our credentials (and the
|
||||
* perf_event_exit_task() that could imply).
|
||||
*/
|
||||
err = -EACCES;
|
||||
if (!perfmon_capable() && !ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS))
|
||||
if (!perf_check_permission(&attr, task))
|
||||
goto err_cred;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue