sched_ext: Fix bypass depth leak on scx_enable() failure
scx_enable() calls scx_bypass(true) to initialize in bypass mode and then
scx_bypass(false) on success to exit. If scx_enable() fails during task
initialization - e.g. scx_cgroup_init() or scx_init_task() returns an error -
it jumps to err_disable while bypass is still active. scx_disable_workfn()
then calls scx_bypass(true/false) for its own bypass, leaving the bypass depth
at 1 instead of 0. This causes the system to remain permanently in bypass mode
after a failed scx_enable().
Failures after task initialization is complete - e.g. scx_tryset_enable_state()
at the end - already call scx_bypass(false) before reaching the error path and
are not affected. This only affects a subset of failure modes.
Fix it by tracking whether scx_enable() called scx_bypass(true) in a bool and
having scx_disable_workfn() call an extra scx_bypass(false) to clear it. This
is a temporary measure as the bypass depth will be moved into the sched
instance, which will make this tracking unnecessary.
Fixes: 8c2090c504 ("sched_ext: Initialize in bypass mode")
Cc: stable@vger.kernel.org # v6.12+
Reported-by: Chris Mason <clm@meta.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
Link: https://lore.kernel.org/stable/286e6f7787a81239e1ce2989b52391ce%40kernel.org
Signed-off-by: Tejun Heo <tj@kernel.org>
master
parent
12b5cd99a0
commit
9f769637a9
|
|
@ -41,6 +41,13 @@ static bool scx_init_task_enabled;
|
|||
static bool scx_switching_all;
|
||||
DEFINE_STATIC_KEY_FALSE(__scx_switched_all);
|
||||
|
||||
/*
|
||||
* Tracks whether scx_enable() called scx_bypass(true). Used to balance bypass
|
||||
* depth on enable failure. Will be removed when bypass depth is moved into the
|
||||
* sched instance.
|
||||
*/
|
||||
static bool scx_bypassed_for_enable;
|
||||
|
||||
static atomic_long_t scx_nr_rejected = ATOMIC_LONG_INIT(0);
|
||||
static atomic_long_t scx_hotplug_seq = ATOMIC_LONG_INIT(0);
|
||||
|
||||
|
|
@ -4318,6 +4325,11 @@ static void scx_disable_workfn(struct kthread_work *work)
|
|||
scx_dsp_max_batch = 0;
|
||||
free_kick_syncs();
|
||||
|
||||
if (scx_bypassed_for_enable) {
|
||||
scx_bypassed_for_enable = false;
|
||||
scx_bypass(false);
|
||||
}
|
||||
|
||||
mutex_unlock(&scx_enable_mutex);
|
||||
|
||||
WARN_ON_ONCE(scx_set_enable_state(SCX_DISABLED) != SCX_DISABLING);
|
||||
|
|
@ -4970,6 +4982,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||
* Init in bypass mode to guarantee forward progress.
|
||||
*/
|
||||
scx_bypass(true);
|
||||
scx_bypassed_for_enable = true;
|
||||
|
||||
for (i = SCX_OPI_NORMAL_BEGIN; i < SCX_OPI_NORMAL_END; i++)
|
||||
if (((void (**)(void))ops)[i])
|
||||
|
|
@ -5067,6 +5080,7 @@ static int scx_enable(struct sched_ext_ops *ops, struct bpf_link *link)
|
|||
scx_task_iter_stop(&sti);
|
||||
percpu_up_write(&scx_fork_rwsem);
|
||||
|
||||
scx_bypassed_for_enable = false;
|
||||
scx_bypass(false);
|
||||
|
||||
if (!scx_tryset_enable_state(SCX_ENABLED, SCX_ENABLING)) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue