perf: Fix owner-list vs exit

Oleg noticed that a perf-fd keeping a reference on the creating task
leads to a few funny side effects.

There's two different aspects to this:

  - kernel based perf-events, these should not take out
    a reference on the creating task and appear on the task's
    event list since they're not bound to fds nor visible
    to userspace.

  - fork() and pthread_create(), these can lead to the creating
    task dying (and thus the task's event-list becomming useless)
    but keeping the list and ref alive until the event is closed.

Combined they lead to malfunction of the ptrace hw_tracepoints.

Cure this by not considering kernel based perf_events for the
owner-list and destroying the owner-list when the owner dies.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Oleg Nesterov <oleg@redhat.com>
LKML-Reference: <1289576883.2084.286.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Zijlstra 2010-11-09 19:01:43 +01:00 committed by Ingo Molnar
parent fcf48a725a
commit 8882135bcd
1 changed files with 51 additions and 12 deletions

View File

@ -2235,11 +2235,6 @@ int perf_event_release_kernel(struct perf_event *event)
raw_spin_unlock_irq(&ctx->lock); raw_spin_unlock_irq(&ctx->lock);
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
mutex_lock(&event->owner->perf_event_mutex);
list_del_init(&event->owner_entry);
mutex_unlock(&event->owner->perf_event_mutex);
put_task_struct(event->owner);
free_event(event); free_event(event);
return 0; return 0;
@ -2252,9 +2247,43 @@ EXPORT_SYMBOL_GPL(perf_event_release_kernel);
static int perf_release(struct inode *inode, struct file *file) static int perf_release(struct inode *inode, struct file *file)
{ {
struct perf_event *event = file->private_data; struct perf_event *event = file->private_data;
struct task_struct *owner;
file->private_data = NULL; file->private_data = NULL;
rcu_read_lock();
owner = ACCESS_ONCE(event->owner);
/*
* Matches the smp_wmb() in perf_event_exit_task(). If we observe
* !owner it means the list deletion is complete and we can indeed
* free this event, otherwise we need to serialize on
* owner->perf_event_mutex.
*/
smp_read_barrier_depends();
if (owner) {
/*
* Since delayed_put_task_struct() also drops the last
* task reference we can safely take a new reference
* while holding the rcu_read_lock().
*/
get_task_struct(owner);
}
rcu_read_unlock();
if (owner) {
mutex_lock(&owner->perf_event_mutex);
/*
* We have to re-check the event->owner field, if it is cleared
* we raced with perf_event_exit_task(), acquiring the mutex
* ensured they're done, and we can proceed with freeing the
* event.
*/
if (event->owner)
list_del_init(&event->owner_entry);
mutex_unlock(&owner->perf_event_mutex);
put_task_struct(owner);
}
return perf_event_release_kernel(event); return perf_event_release_kernel(event);
} }
@ -5678,7 +5707,7 @@ SYSCALL_DEFINE5(perf_event_open,
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
event->owner = current; event->owner = current;
get_task_struct(current);
mutex_lock(&current->perf_event_mutex); mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list); list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex); mutex_unlock(&current->perf_event_mutex);
@ -5746,12 +5775,6 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
++ctx->generation; ++ctx->generation;
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);
event->owner = current;
get_task_struct(current);
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
return event; return event;
err_free: err_free:
@ -5902,8 +5925,24 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn)
*/ */
void perf_event_exit_task(struct task_struct *child) void perf_event_exit_task(struct task_struct *child)
{ {
struct perf_event *event, *tmp;
int ctxn; int ctxn;
mutex_lock(&child->perf_event_mutex);
list_for_each_entry_safe(event, tmp, &child->perf_event_list,
owner_entry) {
list_del_init(&event->owner_entry);
/*
* Ensure the list deletion is visible before we clear
* the owner, closes a race against perf_release() where
* we need to serialize on the owner->perf_event_mutex.
*/
smp_wmb();
event->owner = NULL;
}
mutex_unlock(&child->perf_event_mutex);
for_each_task_context_nr(ctxn) for_each_task_context_nr(ctxn)
perf_event_exit_task_context(child, ctxn); perf_event_exit_task_context(child, ctxn);
} }