sched/isolation: Flush memcg workqueues on cpuset isolated partition change

The HK_TYPE_DOMAIN housekeeping cpumask is now modifiable at runtime. In
order to synchronize against memcg workqueue to make sure that no
asynchronous draining is still pending or executing on a newly made
isolated CPU, the housekeeping susbsystem must flush the memcg
workqueues.

However the memcg workqueues can't be flushed easily since they are
queued to the main per-CPU workqueue pool.

Solve this with creating a memcg specific pool and provide and use the
appropriate flushing API.

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Marco Crivellari <marco.crivellari@suse.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <muchun.song@linux.dev>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeel.butt@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Waiman Long <longman@redhat.com>
Cc: cgroups@vger.kernel.org
Cc: linux-mm@kvack.org
master
Frederic Weisbecker 2025-06-12 15:36:16 +02:00
parent 03ff735101
commit b7eb4edcc3
4 changed files with 18 additions and 1 deletions

View File

@ -1037,6 +1037,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
return id;
}
void mem_cgroup_flush_workqueue(void);
extern int mem_cgroup_init(void);
#else /* CONFIG_MEMCG */
@ -1436,6 +1438,8 @@ static inline u64 cgroup_id_from_mm(struct mm_struct *mm)
return 0;
}
static inline void mem_cgroup_flush_workqueue(void) { }
static inline int mem_cgroup_init(void) { return 0; }
#endif /* CONFIG_MEMCG */

View File

@ -144,6 +144,8 @@ int housekeeping_update(struct cpumask *isol_mask)
synchronize_rcu();
mem_cgroup_flush_workqueue();
kfree(old);
return 0;

View File

@ -44,6 +44,7 @@
#include <linux/lockdep_api.h>
#include <linux/lockdep.h>
#include <linux/memblock.h>
#include <linux/memcontrol.h>
#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/module.h>

View File

@ -96,6 +96,8 @@ static bool cgroup_memory_nokmem __ro_after_init;
/* BPF memory accounting disabled? */
static bool cgroup_memory_nobpf __ro_after_init;
static struct workqueue_struct *memcg_wq __ro_after_init;
static struct kmem_cache *memcg_cachep;
static struct kmem_cache *memcg_pn_cachep;
@ -2013,7 +2015,7 @@ static void schedule_drain_work(int cpu, struct work_struct *work)
*/
guard(rcu)();
if (!cpu_is_isolated(cpu))
schedule_work_on(cpu, work);
queue_work_on(cpu, memcg_wq, work);
}
/*
@ -5125,6 +5127,11 @@ void mem_cgroup_sk_uncharge(const struct sock *sk, unsigned int nr_pages)
refill_stock(memcg, nr_pages);
}
void mem_cgroup_flush_workqueue(void)
{
flush_workqueue(memcg_wq);
}
static int __init cgroup_memory(char *s)
{
char *token;
@ -5167,6 +5174,9 @@ int __init mem_cgroup_init(void)
cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
memcg_hotplug_cpu_dead);
memcg_wq = alloc_workqueue("memcg", WQ_PERCPU, 0);
WARN_ON(!memcg_wq);
for_each_possible_cpu(cpu) {
INIT_WORK(&per_cpu_ptr(&memcg_stock, cpu)->work,
drain_local_memcg_stock);