From 4717432dfd99bbd015b6782adca216c6f9340038 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 9 Aug 2025 21:04:19 +0800 Subject: [PATCH 1/4] sched/deadline: Fix dl_server_stopped() Commit cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") introduces dl_server_stopped(). But it is obvious that dl_server_stopped() should return true if dl_se->dl_server_active is 0. Fixes: cccb45d7c429 ("sched/deadline: Less agressive dl_server handling") Signed-off-by: Huacai Chen Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20250809130419.1980742-1-chenhuacai@loongson.cn --- kernel/sched/deadline.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e2d51f4306b3..bb813afe5b08 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1611,7 +1611,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se) static bool dl_server_stopped(struct sched_dl_entity *dl_se) { if (!dl_se->dl_server_active) - return false; + return true; if (dl_se->dl_server_idle) { dl_server_stop(dl_se); From bb4700adc3abec34c0a38b64f66258e4e233fc16 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Mon, 21 Jul 2025 15:01:42 +0200 Subject: [PATCH 2/4] sched/deadline: Always stop dl-server before changing parameters Commit cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") reduced dl-server overhead by delaying disabling servers only after there are no fair task around for a whole period, which means that deadline entities are not dequeued right away on a server stop event. However, the delay opens up a window in which a request for changing server parameters can break per-runqueue running_bw tracking, as reported by Yuri. Close the problematic window by unconditionally calling dl_server_stop() before applying the new parameters (ensuring deadline entities go through an actual dequeue). Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling") Reported-by: Yuri Andriaccio Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Valentin Schneider Link: https://lore.kernel.org/r/20250721-upstream-fix-dlserver-lessaggressive-b4-v1-1-4ebc10c87e40@redhat.com --- kernel/sched/debug.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 3f06ab84d53f..02e16b70a790 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -376,10 +376,8 @@ static ssize_t sched_fair_server_write(struct file *filp, const char __user *ubu return -EINVAL; } - if (rq->cfs.h_nr_queued) { - update_rq_clock(rq); - dl_server_stop(&rq->fair_server); - } + update_rq_clock(rq); + dl_server_stop(&rq->fair_server); retval = dl_server_apply_params(&rq->fair_server, runtime, period, 0); if (retval) From 421fc59cf58c64f898cafbbbbda0bc705837e7df Mon Sep 17 00:00:00 2001 From: kuyo chang Date: Sun, 15 Jun 2025 21:10:56 +0800 Subject: [PATCH 3/4] sched/deadline: Fix RT task potential starvation when expiry time passed [Symptom] The fair server mechanism, which is intended to prevent fair starvation when higher-priority tasks monopolize the CPU. Specifically, RT tasks on the runqueue may not be scheduled as expected. [Analysis] The log "sched: DL replenish lagged too much" triggered. By memory dump of dl_server: curr = 0xFFFFFF80D6A0AC00 ( dl_server = 0xFFFFFF83CD5B1470( dl_runtime = 0x02FAF080, dl_deadline = 0x3B9ACA00, dl_period = 0x3B9ACA00, dl_bw = 0xCCCC, dl_density = 0xCCCC, runtime = 0x02FAF080, deadline = 0x0000082031EB0E80, flags = 0x0, dl_throttled = 0x0, dl_yielded = 0x0, dl_non_contending = 0x0, dl_overrun = 0x0, dl_server = 0x1, dl_server_active = 0x1, dl_defer = 0x1, dl_defer_armed = 0x0, dl_defer_running = 0x1, dl_timer = ( node = ( expires = 0x000008199756E700), _softexpires = 0x000008199756E700, function = 0xFFFFFFDB9AF44D30 = dl_task_timer, base = 0xFFFFFF83CD5A12C0, state = 0x0, is_rel = 0x0, is_soft = 0x0, clock_update_flags = 0x4, clock = 0x000008204A496900, - The timer expiration time (rq->curr->dl_server->dl_timer->expires) is already in the past, indicating the timer has expired. - The timer state (rq->curr->dl_server->dl_timer->state) is 0. [Suspected Root Cause] The relevant code flow in the throttle path of update_curr_dl_se() as follows: dequeue_dl_entity(dl_se, 0); // the DL entity is dequeued if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) { if (dl_server(dl_se)) // timer registration fails enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH);//enqueue immediately ... } The failure of `start_dl_timer` is caused by attempting to register a timer with an expiration time that is already in the past. When this situation persists, the code repeatedly re-enqueues the DL entity without properly replenishing or restarting the timer, resulting in RT task may not be scheduled as expected. [Proposed Solution]: Instead of immediately re-enqueuing the DL entity on timer registration failure, this change ensures the DL entity is properly replenished and the timer is restarted, preventing RT potential starvation. Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") Signed-off-by: kuyo chang Signed-off-by: Peter Zijlstra (Intel) Closes: https://lore.kernel.org/CAMuHMdXn4z1pioTtBGMfQM0jsLviqS2jwysaWXpoLxWYoGa82w@mail.gmail.com Tested-by: Geert Uytterhoeven Tested-by: Jiri Slaby Tested-by: Diederik de Haas Link: https://lkml.kernel.org/r/20250615131129.954975-1-kuyo.chang@mediatek.com --- kernel/sched/deadline.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index bb813afe5b08..88c3bd64a8a0 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1496,10 +1496,12 @@ throttle: } if (unlikely(is_dl_boosted(dl_se) || !start_dl_timer(dl_se))) { - if (dl_server(dl_se)) - enqueue_dl_entity(dl_se, ENQUEUE_REPLENISH); - else + if (dl_server(dl_se)) { + replenish_dl_new_period(dl_se, rq); + start_dl_timer(dl_se); + } else { enqueue_task_dl(rq, dl_task_of(dl_se), ENQUEUE_REPLENISH); + } } if (!is_leftmost(dl_se, &rq->dl)) From 52d15521eb75f9b521744db675bee61025d2fa52 Mon Sep 17 00:00:00 2001 From: Yicong Yang Date: Fri, 27 Jun 2025 11:54:20 +0800 Subject: [PATCH 4/4] sched/deadline: Don't count nr_running for dl_server proxy tasks On CPU offline the kernel stalled with below call trace: INFO: task kworker/0:1:11 blocked for more than 120 seconds. cpuhp hold the cpu hotplug lock endless and stalled vmstat_shepherd. This is because we count nr_running twice on cpuhp enqueuing and failed the wait condition of cpuhp: enqueue_task_fair() // pick cpuhp from idle, rq->nr_running = 0 dl_server_start() [...] add_nr_running() // rq->nr_running = 1 add_nr_running() // rq->nr_running = 2 [switch to cpuhp, waiting on balance_hotplug_wait()] rcuwait_wait_event(rq->nr_running == 1 && ...) // failed, rq->nr_running=2 schedule() // wait again It doesn't make sense to count the dl_server towards runnable tasks, since it runs other tasks. Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers") Signed-off-by: Yicong Yang Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/r/20250627035420.37712-1-yangyicong@huawei.com --- kernel/sched/deadline.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 88c3bd64a8a0..f25301267e47 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1851,7 +1851,9 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) u64 deadline = dl_se->deadline; dl_rq->dl_nr_running++; - add_nr_running(rq_of_dl_rq(dl_rq), 1); + + if (!dl_server(dl_se)) + add_nr_running(rq_of_dl_rq(dl_rq), 1); inc_dl_deadline(dl_rq, deadline); } @@ -1861,7 +1863,9 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq) { WARN_ON(!dl_rq->dl_nr_running); dl_rq->dl_nr_running--; - sub_nr_running(rq_of_dl_rq(dl_rq), 1); + + if (!dl_server(dl_se)) + sub_nr_running(rq_of_dl_rq(dl_rq), 1); dec_dl_deadline(dl_rq, dl_se->deadline); }