perf/core: Fix race between close() and fork()
Syzcaller reported the following Use-after-Free bug: close() clone() copy_process() perf_event_init_task() perf_event_init_context() mutex_lock(parent_ctx->mutex) inherit_task_group() inherit_group() inherit_event() mutex_lock(event->child_mutex) // expose event on child list list_add_tail() mutex_unlock(event->child_mutex) mutex_unlock(parent_ctx->mutex) ... goto bad_fork_* bad_fork_cleanup_perf: perf_event_free_task() perf_release() perf_event_release_kernel() list_for_each_entry() mutex_lock(ctx->mutex) mutex_lock(event->child_mutex) // event is from the failing inherit // on the other CPU perf_remove_from_context() list_move() mutex_unlock(event->child_mutex) mutex_unlock(ctx->mutex) mutex_lock(ctx->mutex) list_for_each_entry_safe() // event already stolen mutex_unlock(ctx->mutex) delayed_free_task() free_task() list_for_each_entry_safe() list_del() free_event() _free_event() // and so event->hw.target // is the already freed failed clone() if (event->hw.target) put_task_struct(event->hw.target) // WHOOPSIE, already quite dead Which puts the lie to the the comment on perf_event_free_task(): 'unexposed, unused context' not so much. Which is a 'fun' confluence of fail; copy_process() doing an unconditional free_task() and not respecting refcounts, and perf having creative locking. In particular:82d94856fa
("perf/core: Fix lock inversion between perf,trace,cpuhp") seems to have overlooked this 'fun' parade. Solve it by using the fact that detached events still have a reference count on their (previous) context. With this perf_event_free_task() can detect when events have escaped and wait for their destruction. Debugged-by: Alexander Shishkin <alexander.shishkin@linux.intel.com> Reported-by: syzbot+a24c397a29ad22d86c98@syzkaller.appspotmail.com Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Mark Rutland <mark.rutland@arm.com> Cc: <stable@vger.kernel.org> Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com> Cc: Arnaldo Carvalho de Melo <acme@redhat.com> Cc: Jiri Olsa <jolsa@redhat.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Stephane Eranian <eranian@google.com> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vince Weaver <vincent.weaver@maine.edu> Fixes:82d94856fa
("perf/core: Fix lock inversion between perf,trace,cpuhp") Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
parent
e5eb08ac81
commit
1cf8dfe8a6
|
@ -4465,12 +4465,20 @@ static void _free_event(struct perf_event *event)
|
|||
if (event->destroy)
|
||||
event->destroy(event);
|
||||
|
||||
if (event->ctx)
|
||||
put_ctx(event->ctx);
|
||||
|
||||
/*
|
||||
* Must be after ->destroy(), due to uprobe_perf_close() using
|
||||
* hw.target.
|
||||
*/
|
||||
if (event->hw.target)
|
||||
put_task_struct(event->hw.target);
|
||||
|
||||
/*
|
||||
* perf_event_free_task() relies on put_ctx() being 'last', in particular
|
||||
* all task references must be cleaned up.
|
||||
*/
|
||||
if (event->ctx)
|
||||
put_ctx(event->ctx);
|
||||
|
||||
exclusive_event_destroy(event);
|
||||
module_put(event->pmu->module);
|
||||
|
||||
|
@ -4650,8 +4658,17 @@ again:
|
|||
mutex_unlock(&event->child_mutex);
|
||||
|
||||
list_for_each_entry_safe(child, tmp, &free_list, child_list) {
|
||||
void *var = &child->ctx->refcount;
|
||||
|
||||
list_del(&child->child_list);
|
||||
free_event(child);
|
||||
|
||||
/*
|
||||
* Wake any perf_event_free_task() waiting for this event to be
|
||||
* freed.
|
||||
*/
|
||||
smp_mb(); /* pairs with wait_var_event() */
|
||||
wake_up_var(var);
|
||||
}
|
||||
|
||||
no_ctx:
|
||||
|
@ -11527,11 +11544,11 @@ static void perf_free_event(struct perf_event *event,
|
|||
}
|
||||
|
||||
/*
|
||||
* Free an unexposed, unused context as created by inheritance by
|
||||
* perf_event_init_task below, used by fork() in case of fail.
|
||||
* Free a context as created by inheritance by perf_event_init_task() below,
|
||||
* used by fork() in case of fail.
|
||||
*
|
||||
* Not all locks are strictly required, but take them anyway to be nice and
|
||||
* help out with the lockdep assertions.
|
||||
* Even though the task has never lived, the context and events have been
|
||||
* exposed through the child_list, so we must take care tearing it all down.
|
||||
*/
|
||||
void perf_event_free_task(struct task_struct *task)
|
||||
{
|
||||
|
@ -11561,7 +11578,23 @@ void perf_event_free_task(struct task_struct *task)
|
|||
perf_free_event(event, ctx);
|
||||
|
||||
mutex_unlock(&ctx->mutex);
|
||||
put_ctx(ctx);
|
||||
|
||||
/*
|
||||
* perf_event_release_kernel() could've stolen some of our
|
||||
* child events and still have them on its free_list. In that
|
||||
* case we must wait for these events to have been freed (in
|
||||
* particular all their references to this task must've been
|
||||
* dropped).
|
||||
*
|
||||
* Without this copy_process() will unconditionally free this
|
||||
* task (irrespective of its reference count) and
|
||||
* _free_event()'s put_task_struct(event->hw.target) will be a
|
||||
* use-after-free.
|
||||
*
|
||||
* Wait for all events to drop their context reference.
|
||||
*/
|
||||
wait_var_event(&ctx->refcount, refcount_read(&ctx->refcount) == 1);
|
||||
put_ctx(ctx); /* must be last */
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue