perf/core: Fix missing read event generation on task exit

For events with inherit_stat enabled, a "read" event will be generated
to collect per task event counts on task exit.

The call chain is as follows:

do_exit
  -> perf_event_exit_task
    -> perf_event_exit_task_context
      -> perf_event_exit_event
        -> perf_remove_from_context
          -> perf_child_detach
            -> sync_child_event
              -> perf_event_read_event

However, the child event context detaches the task too early in
perf_event_exit_task_context, which causes sync_child_event to never
generate the read event in this case, since child_event->ctx->task is
always set to TASK_TOMBSTONE. Fix that by moving context lock section
backward to ensure ctx->task is not set to TASK_TOMBSTONE before
generating the read event.

Because perf_event_free_task calls perf_event_exit_task_context with
exit = false to tear down all child events from the context, and the
task never lived, accessing the task PID can lead to a use-after-free.

To fix that, let sync_child_event read task from argument and move the
call to the only place it should be triggered to avoid the effect of
setting ctx->task to TASK_TOMESTONE, and add a task parameter to
perf_event_exit_event to trigger the sync_child_event properly when
needed.

This bug can be reproduced by running "perf record -s" and attaching to
any program that generates perf events in its child tasks. If we check
the result with "perf report -T", the last line of the report will leave
an empty table like "# PID  TID", which is expected to contain the
per-task event counts by design.

Fixes: ef54c1a476 ("perf: Rework perf_event_exit_event()")
Signed-off-by: Thaumy Cheng <thaumy.love@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@linaro.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linux-perf-users@vger.kernel.org
Link: https://patch.msgid.link/20251209041600.963586-1-thaumy.love@gmail.com
pull/1354/merge
Thaumy Cheng 2025-12-09 12:16:00 +08:00 committed by Ingo Molnar
parent 0143928651
commit c418d8b4d7
1 changed files with 12 additions and 10 deletions

View File

@ -2317,8 +2317,6 @@ out:
perf_event__header_size(leader); perf_event__header_size(leader);
} }
static void sync_child_event(struct perf_event *child_event);
static void perf_child_detach(struct perf_event *event) static void perf_child_detach(struct perf_event *event)
{ {
struct perf_event *parent_event = event->parent; struct perf_event *parent_event = event->parent;
@ -2337,7 +2335,6 @@ static void perf_child_detach(struct perf_event *event)
lockdep_assert_held(&parent_event->child_mutex); lockdep_assert_held(&parent_event->child_mutex);
*/ */
sync_child_event(event);
list_del_init(&event->child_list); list_del_init(&event->child_list);
} }
@ -4588,6 +4585,7 @@ out:
static void perf_remove_from_owner(struct perf_event *event); static void perf_remove_from_owner(struct perf_event *event);
static void perf_event_exit_event(struct perf_event *event, static void perf_event_exit_event(struct perf_event *event,
struct perf_event_context *ctx, struct perf_event_context *ctx,
struct task_struct *task,
bool revoke); bool revoke);
/* /*
@ -4615,7 +4613,7 @@ static void perf_event_remove_on_exec(struct perf_event_context *ctx)
modified = true; modified = true;
perf_event_exit_event(event, ctx, false); perf_event_exit_event(event, ctx, ctx->task, false);
} }
raw_spin_lock_irqsave(&ctx->lock, flags); raw_spin_lock_irqsave(&ctx->lock, flags);
@ -12518,7 +12516,7 @@ static void __pmu_detach_event(struct pmu *pmu, struct perf_event *event,
/* /*
* De-schedule the event and mark it REVOKED. * De-schedule the event and mark it REVOKED.
*/ */
perf_event_exit_event(event, ctx, true); perf_event_exit_event(event, ctx, ctx->task, true);
/* /*
* All _free_event() bits that rely on event->pmu: * All _free_event() bits that rely on event->pmu:
@ -14075,14 +14073,13 @@ void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
} }
EXPORT_SYMBOL_GPL(perf_pmu_migrate_context); EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
static void sync_child_event(struct perf_event *child_event) static void sync_child_event(struct perf_event *child_event,
struct task_struct *task)
{ {
struct perf_event *parent_event = child_event->parent; struct perf_event *parent_event = child_event->parent;
u64 child_val; u64 child_val;
if (child_event->attr.inherit_stat) { if (child_event->attr.inherit_stat) {
struct task_struct *task = child_event->ctx->task;
if (task && task != TASK_TOMBSTONE) if (task && task != TASK_TOMBSTONE)
perf_event_read_event(child_event, task); perf_event_read_event(child_event, task);
} }
@ -14101,7 +14098,9 @@ static void sync_child_event(struct perf_event *child_event)
static void static void
perf_event_exit_event(struct perf_event *event, perf_event_exit_event(struct perf_event *event,
struct perf_event_context *ctx, bool revoke) struct perf_event_context *ctx,
struct task_struct *task,
bool revoke)
{ {
struct perf_event *parent_event = event->parent; struct perf_event *parent_event = event->parent;
unsigned long detach_flags = DETACH_EXIT; unsigned long detach_flags = DETACH_EXIT;
@ -14124,6 +14123,9 @@ perf_event_exit_event(struct perf_event *event,
mutex_lock(&parent_event->child_mutex); mutex_lock(&parent_event->child_mutex);
/* PERF_ATTACH_ITRACE might be set concurrently */ /* PERF_ATTACH_ITRACE might be set concurrently */
attach_state = READ_ONCE(event->attach_state); attach_state = READ_ONCE(event->attach_state);
if (attach_state & PERF_ATTACH_CHILD)
sync_child_event(event, task);
} }
if (revoke) if (revoke)
@ -14215,7 +14217,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
perf_event_task(task, ctx, 0); perf_event_task(task, ctx, 0);
list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry) list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
perf_event_exit_event(child_event, ctx, false); perf_event_exit_event(child_event, ctx, exit ? task : NULL, false);
mutex_unlock(&ctx->mutex); mutex_unlock(&ctx->mutex);