locking: Add task::blocked_lock to serialize blocked_on state
So far, we have been able to utilize the mutex::wait_lock for serializing the blocked_on state, but when we move to proxying across runqueues, we will need to add more state and a way to serialize changes to this state in contexts where we don't hold the mutex::wait_lock. So introduce the task::blocked_lock, which nests under the mutex::wait_lock in the locking order, and rework the locking to use it. Signed-off-by: John Stultz <jstultz@google.com> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com> Link: https://patch.msgid.link/20260324191337.1841376-5-jstultz@google.commaster
parent
f4fe6be82e
commit
fa4a1ff8ab
|
|
@ -1238,6 +1238,7 @@ struct task_struct {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct mutex *blocked_on; /* lock we're blocked on */
|
struct mutex *blocked_on; /* lock we're blocked on */
|
||||||
|
raw_spinlock_t blocked_lock;
|
||||||
|
|
||||||
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
|
#ifdef CONFIG_DETECT_HUNG_TASK_BLOCKER
|
||||||
/*
|
/*
|
||||||
|
|
@ -2181,57 +2182,42 @@ extern int __cond_resched_rwlock_write(rwlock_t *lock) __must_hold(lock);
|
||||||
#ifndef CONFIG_PREEMPT_RT
|
#ifndef CONFIG_PREEMPT_RT
|
||||||
static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
|
static inline struct mutex *__get_task_blocked_on(struct task_struct *p)
|
||||||
{
|
{
|
||||||
struct mutex *m = p->blocked_on;
|
lockdep_assert_held_once(&p->blocked_lock);
|
||||||
|
return p->blocked_on;
|
||||||
if (m)
|
|
||||||
lockdep_assert_held_once(&m->wait_lock);
|
|
||||||
return m;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
|
static inline void __set_task_blocked_on(struct task_struct *p, struct mutex *m)
|
||||||
{
|
{
|
||||||
struct mutex *blocked_on = READ_ONCE(p->blocked_on);
|
|
||||||
|
|
||||||
WARN_ON_ONCE(!m);
|
WARN_ON_ONCE(!m);
|
||||||
/* The task should only be setting itself as blocked */
|
/* The task should only be setting itself as blocked */
|
||||||
WARN_ON_ONCE(p != current);
|
WARN_ON_ONCE(p != current);
|
||||||
/* Currently we serialize blocked_on under the mutex::wait_lock */
|
/* Currently we serialize blocked_on under the task::blocked_lock */
|
||||||
lockdep_assert_held_once(&m->wait_lock);
|
lockdep_assert_held_once(&p->blocked_lock);
|
||||||
/*
|
/*
|
||||||
* Check ensure we don't overwrite existing mutex value
|
* Check ensure we don't overwrite existing mutex value
|
||||||
* with a different mutex. Note, setting it to the same
|
* with a different mutex. Note, setting it to the same
|
||||||
* lock repeatedly is ok.
|
* lock repeatedly is ok.
|
||||||
*/
|
*/
|
||||||
WARN_ON_ONCE(blocked_on && blocked_on != m);
|
WARN_ON_ONCE(p->blocked_on && p->blocked_on != m);
|
||||||
WRITE_ONCE(p->blocked_on, m);
|
p->blocked_on = m;
|
||||||
}
|
|
||||||
|
|
||||||
static inline void set_task_blocked_on(struct task_struct *p, struct mutex *m)
|
|
||||||
{
|
|
||||||
guard(raw_spinlock_irqsave)(&m->wait_lock);
|
|
||||||
__set_task_blocked_on(p, m);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
|
static inline void __clear_task_blocked_on(struct task_struct *p, struct mutex *m)
|
||||||
{
|
{
|
||||||
if (m) {
|
/* Currently we serialize blocked_on under the task::blocked_lock */
|
||||||
struct mutex *blocked_on = READ_ONCE(p->blocked_on);
|
lockdep_assert_held_once(&p->blocked_lock);
|
||||||
|
|
||||||
/* Currently we serialize blocked_on under the mutex::wait_lock */
|
|
||||||
lockdep_assert_held_once(&m->wait_lock);
|
|
||||||
/*
|
/*
|
||||||
* There may be cases where we re-clear already cleared
|
* There may be cases where we re-clear already cleared
|
||||||
* blocked_on relationships, but make sure we are not
|
* blocked_on relationships, but make sure we are not
|
||||||
* clearing the relationship with a different lock.
|
* clearing the relationship with a different lock.
|
||||||
*/
|
*/
|
||||||
WARN_ON_ONCE(blocked_on && blocked_on != m);
|
WARN_ON_ONCE(m && p->blocked_on && p->blocked_on != m);
|
||||||
}
|
p->blocked_on = NULL;
|
||||||
WRITE_ONCE(p->blocked_on, NULL);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
|
static inline void clear_task_blocked_on(struct task_struct *p, struct mutex *m)
|
||||||
{
|
{
|
||||||
guard(raw_spinlock_irqsave)(&m->wait_lock);
|
guard(raw_spinlock_irqsave)(&p->blocked_lock);
|
||||||
__clear_task_blocked_on(p, m);
|
__clear_task_blocked_on(p, m);
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,7 @@ struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
|
||||||
.journal_info = NULL,
|
.journal_info = NULL,
|
||||||
INIT_CPU_TIMERS(init_task)
|
INIT_CPU_TIMERS(init_task)
|
||||||
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
|
.pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock),
|
||||||
|
.blocked_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.blocked_lock),
|
||||||
.timer_slack_ns = 50000, /* 50 usec default slack */
|
.timer_slack_ns = 50000, /* 50 usec default slack */
|
||||||
.thread_pid = &init_struct_pid,
|
.thread_pid = &init_struct_pid,
|
||||||
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
|
.thread_node = LIST_HEAD_INIT(init_signals.thread_head),
|
||||||
|
|
|
||||||
|
|
@ -2076,6 +2076,7 @@ __latent_entropy struct task_struct *copy_process(
|
||||||
ftrace_graph_init_task(p);
|
ftrace_graph_init_task(p);
|
||||||
|
|
||||||
rt_mutex_init_task(p);
|
rt_mutex_init_task(p);
|
||||||
|
raw_spin_lock_init(&p->blocked_lock);
|
||||||
|
|
||||||
lockdep_assert_irqs_enabled();
|
lockdep_assert_irqs_enabled();
|
||||||
#ifdef CONFIG_PROVE_LOCKING
|
#ifdef CONFIG_PROVE_LOCKING
|
||||||
|
|
|
||||||
|
|
@ -54,13 +54,13 @@ void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
||||||
lockdep_assert_held(&lock->wait_lock);
|
lockdep_assert_held(&lock->wait_lock);
|
||||||
|
|
||||||
/* Current thread can't be already blocked (since it's executing!) */
|
/* Current thread can't be already blocked (since it's executing!) */
|
||||||
DEBUG_LOCKS_WARN_ON(__get_task_blocked_on(task));
|
DEBUG_LOCKS_WARN_ON(get_task_blocked_on(task));
|
||||||
}
|
}
|
||||||
|
|
||||||
void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
void debug_mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter,
|
||||||
struct task_struct *task)
|
struct task_struct *task)
|
||||||
{
|
{
|
||||||
struct mutex *blocked_on = __get_task_blocked_on(task);
|
struct mutex *blocked_on = get_task_blocked_on(task);
|
||||||
|
|
||||||
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
|
DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list));
|
||||||
DEBUG_LOCKS_WARN_ON(waiter->task != task);
|
DEBUG_LOCKS_WARN_ON(waiter->task != task);
|
||||||
|
|
|
||||||
|
|
@ -656,6 +656,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
|
||||||
goto err_early_kill;
|
goto err_early_kill;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
raw_spin_lock(¤t->blocked_lock);
|
||||||
__set_task_blocked_on(current, lock);
|
__set_task_blocked_on(current, lock);
|
||||||
set_current_state(state);
|
set_current_state(state);
|
||||||
trace_contention_begin(lock, LCB_F_MUTEX);
|
trace_contention_begin(lock, LCB_F_MUTEX);
|
||||||
|
|
@ -669,8 +670,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
|
||||||
* the handoff.
|
* the handoff.
|
||||||
*/
|
*/
|
||||||
if (__mutex_trylock(lock))
|
if (__mutex_trylock(lock))
|
||||||
goto acquired;
|
break;
|
||||||
|
|
||||||
|
raw_spin_unlock(¤t->blocked_lock);
|
||||||
/*
|
/*
|
||||||
* Check for signals and kill conditions while holding
|
* Check for signals and kill conditions while holding
|
||||||
* wait_lock. This ensures the lock cancellation is ordered
|
* wait_lock. This ensures the lock cancellation is ordered
|
||||||
|
|
@ -693,12 +695,14 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
|
||||||
|
|
||||||
first = __mutex_waiter_is_first(lock, &waiter);
|
first = __mutex_waiter_is_first(lock, &waiter);
|
||||||
|
|
||||||
|
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
||||||
|
raw_spin_lock(¤t->blocked_lock);
|
||||||
/*
|
/*
|
||||||
* As we likely have been woken up by task
|
* As we likely have been woken up by task
|
||||||
* that has cleared our blocked_on state, re-set
|
* that has cleared our blocked_on state, re-set
|
||||||
* it to the lock we are trying to acquire.
|
* it to the lock we are trying to acquire.
|
||||||
*/
|
*/
|
||||||
set_task_blocked_on(current, lock);
|
__set_task_blocked_on(current, lock);
|
||||||
set_current_state(state);
|
set_current_state(state);
|
||||||
/*
|
/*
|
||||||
* Here we order against unlock; we must either see it change
|
* Here we order against unlock; we must either see it change
|
||||||
|
|
@ -709,25 +713,33 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (first) {
|
if (first) {
|
||||||
trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
|
bool opt_acquired;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* mutex_optimistic_spin() can call schedule(), so
|
* mutex_optimistic_spin() can call schedule(), so
|
||||||
* clear blocked on so we don't become unselectable
|
* we need to release these locks before calling it,
|
||||||
|
* and clear blocked on so we don't become unselectable
|
||||||
* to run.
|
* to run.
|
||||||
*/
|
*/
|
||||||
clear_task_blocked_on(current, lock);
|
__clear_task_blocked_on(current, lock);
|
||||||
if (mutex_optimistic_spin(lock, ww_ctx, &waiter))
|
raw_spin_unlock(¤t->blocked_lock);
|
||||||
break;
|
raw_spin_unlock_irqrestore(&lock->wait_lock, flags);
|
||||||
set_task_blocked_on(current, lock);
|
|
||||||
trace_contention_begin(lock, LCB_F_MUTEX);
|
trace_contention_begin(lock, LCB_F_MUTEX | LCB_F_SPIN);
|
||||||
}
|
opt_acquired = mutex_optimistic_spin(lock, ww_ctx, &waiter);
|
||||||
|
|
||||||
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
||||||
|
raw_spin_lock(¤t->blocked_lock);
|
||||||
|
__set_task_blocked_on(current, lock);
|
||||||
|
|
||||||
|
if (opt_acquired)
|
||||||
|
break;
|
||||||
|
trace_contention_begin(lock, LCB_F_MUTEX);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
raw_spin_lock_irqsave(&lock->wait_lock, flags);
|
|
||||||
acquired:
|
|
||||||
__clear_task_blocked_on(current, lock);
|
__clear_task_blocked_on(current, lock);
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
|
raw_spin_unlock(¤t->blocked_lock);
|
||||||
|
|
||||||
if (ww_ctx) {
|
if (ww_ctx) {
|
||||||
/*
|
/*
|
||||||
|
|
@ -756,11 +768,11 @@ skip_wait:
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
err:
|
err:
|
||||||
__clear_task_blocked_on(current, lock);
|
clear_task_blocked_on(current, lock);
|
||||||
__set_current_state(TASK_RUNNING);
|
__set_current_state(TASK_RUNNING);
|
||||||
__mutex_remove_waiter(lock, &waiter);
|
__mutex_remove_waiter(lock, &waiter);
|
||||||
err_early_kill:
|
err_early_kill:
|
||||||
WARN_ON(__get_task_blocked_on(current));
|
WARN_ON(get_task_blocked_on(current));
|
||||||
trace_contention_end(lock, ret);
|
trace_contention_end(lock, ret);
|
||||||
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
|
raw_spin_unlock_irqrestore_wake(&lock->wait_lock, flags, &wake_q);
|
||||||
debug_mutex_free_waiter(&waiter);
|
debug_mutex_free_waiter(&waiter);
|
||||||
|
|
@ -971,7 +983,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne
|
||||||
next = waiter->task;
|
next = waiter->task;
|
||||||
|
|
||||||
debug_mutex_wake_waiter(lock, waiter);
|
debug_mutex_wake_waiter(lock, waiter);
|
||||||
__clear_task_blocked_on(next, lock);
|
clear_task_blocked_on(next, lock);
|
||||||
wake_q_add(&wake_q, next);
|
wake_q_add(&wake_q, next);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,12 @@ static inline struct task_struct *__mutex_owner(struct mutex *lock)
|
||||||
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS);
|
return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline struct mutex *get_task_blocked_on(struct task_struct *p)
|
||||||
|
{
|
||||||
|
guard(raw_spinlock_irqsave)(&p->blocked_lock);
|
||||||
|
return __get_task_blocked_on(p);
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_MUTEXES
|
#ifdef CONFIG_DEBUG_MUTEXES
|
||||||
extern void debug_mutex_lock_common(struct mutex *lock,
|
extern void debug_mutex_lock_common(struct mutex *lock,
|
||||||
struct mutex_waiter *waiter);
|
struct mutex_waiter *waiter);
|
||||||
|
|
|
||||||
|
|
@ -289,7 +289,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter,
|
||||||
* blocked_on pointer. Otherwise we can see circular
|
* blocked_on pointer. Otherwise we can see circular
|
||||||
* blocked_on relationships that can't resolve.
|
* blocked_on relationships that can't resolve.
|
||||||
*/
|
*/
|
||||||
__clear_task_blocked_on(waiter->task, lock);
|
clear_task_blocked_on(waiter->task, lock);
|
||||||
wake_q_add(wake_q, waiter->task);
|
wake_q_add(wake_q, waiter->task);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -347,7 +347,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock,
|
||||||
* are waking the mutex owner, who may be currently
|
* are waking the mutex owner, who may be currently
|
||||||
* blocked on a different mutex.
|
* blocked on a different mutex.
|
||||||
*/
|
*/
|
||||||
__clear_task_blocked_on(owner, NULL);
|
clear_task_blocked_on(owner, NULL);
|
||||||
wake_q_add(wake_q, owner);
|
wake_q_add(wake_q, owner);
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
|
|
|
||||||
|
|
@ -6584,6 +6584,7 @@ static struct task_struct *proxy_deactivate(struct rq *rq, struct task_struct *d
|
||||||
* p->pi_lock
|
* p->pi_lock
|
||||||
* rq->lock
|
* rq->lock
|
||||||
* mutex->wait_lock
|
* mutex->wait_lock
|
||||||
|
* p->blocked_lock
|
||||||
*
|
*
|
||||||
* Returns the task that is going to be used as execution context (the one
|
* Returns the task that is going to be used as execution context (the one
|
||||||
* that is actually going to be run on cpu_of(rq)).
|
* that is actually going to be run on cpu_of(rq)).
|
||||||
|
|
@ -6603,8 +6604,9 @@ find_proxy_task(struct rq *rq, struct task_struct *donor, struct rq_flags *rf)
|
||||||
* and ensure @owner sticks around.
|
* and ensure @owner sticks around.
|
||||||
*/
|
*/
|
||||||
guard(raw_spinlock)(&mutex->wait_lock);
|
guard(raw_spinlock)(&mutex->wait_lock);
|
||||||
|
guard(raw_spinlock)(&p->blocked_lock);
|
||||||
|
|
||||||
/* Check again that p is blocked with wait_lock held */
|
/* Check again that p is blocked with blocked_lock held */
|
||||||
if (mutex != __get_task_blocked_on(p)) {
|
if (mutex != __get_task_blocked_on(p)) {
|
||||||
/*
|
/*
|
||||||
* Something changed in the blocked_on chain and
|
* Something changed in the blocked_on chain and
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue