cgroup: Fixes for v7.1-rc4
Two rstat fixes:
- Out-of-bounds access in the css_rstat_updated() BPF kfunc when called
with an unchecked user-supplied cpu.
- Over-strict NMI guard after the recent switch to try_cmpxchg left
sparc and ppc64 unable to queue rstat updates from NMI.
-----BEGIN PGP SIGNATURE-----
iIQEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCahCHDA4cdGpAa2VybmVs
Lm9yZwAKCRCxYfJx3gVYGTk2AP9Me+BV0h17oEuaqAii7uzMom6zCYUO6KY6ADAe
zr+zcgEA0B72FxH+GyPwe7lhropwg9WR6jagsCFN/tlMPHwQrwc=
=Sos7
-----END PGP SIGNATURE-----
Merge tag 'cgroup-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
"Two rstat fixes:
- Out-of-bounds access in the css_rstat_updated() BPF kfunc when
called with an unchecked user-supplied cpu
- Over-strict NMI guard after the recent switch to try_cmpxchg left
sparc and ppc64 unable to queue rstat updates from NMI"
* tag 'cgroup-for-7.1-rc4-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: rstat: relax NMI guard after switch to try_cmpxchg
cgroup/rstat: validate cpu before css_rstat_cpu() access
master
commit
de37e502a3
|
|
@ -2241,7 +2241,7 @@ void blk_cgroup_bio_start(struct bio *bio)
|
||||||
}
|
}
|
||||||
|
|
||||||
u64_stats_update_end_irqrestore(&bis->sync, flags);
|
u64_stats_update_end_irqrestore(&bis->sync, flags);
|
||||||
css_rstat_updated(&blkcg->css, cpu);
|
__css_rstat_updated(&blkcg->css, cpu);
|
||||||
put_cpu();
|
put_cpu();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -777,6 +777,7 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
|
||||||
/*
|
/*
|
||||||
* cgroup scalable recursive statistics.
|
* cgroup scalable recursive statistics.
|
||||||
*/
|
*/
|
||||||
|
void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
|
||||||
void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
|
void css_rstat_updated(struct cgroup_subsys_state *css, int cpu);
|
||||||
void css_rstat_flush(struct cgroup_subsys_state *css);
|
void css_rstat_flush(struct cgroup_subsys_state *css);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
#include "cgroup-internal.h"
|
#include "cgroup-internal.h"
|
||||||
|
|
||||||
|
#include <linux/cpumask.h>
|
||||||
#include <linux/sched/cputime.h>
|
#include <linux/sched/cputime.h>
|
||||||
|
|
||||||
#include <linux/bpf.h>
|
#include <linux/bpf.h>
|
||||||
|
|
@ -53,7 +54,7 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* css_rstat_updated - keep track of updated rstat_cpu
|
* __css_rstat_updated - keep track of updated rstat_cpu
|
||||||
* @css: target cgroup subsystem state
|
* @css: target cgroup subsystem state
|
||||||
* @cpu: cpu on which rstat_cpu was updated
|
* @cpu: cpu on which rstat_cpu was updated
|
||||||
*
|
*
|
||||||
|
|
@ -63,31 +64,27 @@ static inline struct llist_head *ss_lhead_cpu(struct cgroup_subsys *ss, int cpu)
|
||||||
*
|
*
|
||||||
* NOTE: if the user needs the guarantee that the updater either add itself in
|
* NOTE: if the user needs the guarantee that the updater either add itself in
|
||||||
* the lockless list or the concurrent flusher flushes its updated stats, a
|
* the lockless list or the concurrent flusher flushes its updated stats, a
|
||||||
* memory barrier is needed before the call to css_rstat_updated() i.e. a
|
* memory barrier is needed before the call to __css_rstat_updated() i.e. a
|
||||||
* barrier after updating the per-cpu stats and before calling
|
* barrier after updating the per-cpu stats and before calling
|
||||||
* css_rstat_updated().
|
* __css_rstat_updated().
|
||||||
*/
|
*/
|
||||||
__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
|
void __css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
|
||||||
{
|
{
|
||||||
struct llist_head *lhead;
|
struct llist_head *lhead;
|
||||||
struct css_rstat_cpu *rstatc;
|
struct css_rstat_cpu *rstatc;
|
||||||
struct llist_node *self;
|
struct llist_node *self;
|
||||||
|
|
||||||
/*
|
/* Prevent access to uninitialized rstat pointers. */
|
||||||
* Since bpf programs can call this function, prevent access to
|
|
||||||
* uninitialized rstat pointers.
|
|
||||||
*/
|
|
||||||
if (!css_uses_rstat(css))
|
if (!css_uses_rstat(css))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
lockdep_assert_preemption_disabled();
|
lockdep_assert_preemption_disabled();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For archs withnot nmi safe cmpxchg or percpu ops support, ignore
|
* The lockless insertion below relies on NMI-safe cmpxchg;
|
||||||
* the requests from nmi context.
|
* bail out in NMI on archs that don't provide it.
|
||||||
*/
|
*/
|
||||||
if ((!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) ||
|
if (!IS_ENABLED(CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG) && in_nmi())
|
||||||
!IS_ENABLED(CONFIG_ARCH_HAS_NMI_SAFE_THIS_CPU_OPS)) && in_nmi())
|
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rstatc = css_rstat_cpu(css, cpu);
|
rstatc = css_rstat_cpu(css, cpu);
|
||||||
|
|
@ -125,6 +122,18 @@ __bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
|
||||||
llist_add(&rstatc->lnode, lhead);
|
llist_add(&rstatc->lnode, lhead);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* BPF-facing wrapper for __css_rstat_updated(). Validate the caller-provided
|
||||||
|
* CPU before passing it to the internal rstat updater.
|
||||||
|
*/
|
||||||
|
__bpf_kfunc void css_rstat_updated(struct cgroup_subsys_state *css, int cpu)
|
||||||
|
{
|
||||||
|
if (unlikely(cpu < 0 || cpu >= nr_cpu_ids || !cpu_possible(cpu)))
|
||||||
|
return;
|
||||||
|
|
||||||
|
__css_rstat_updated(css, cpu);
|
||||||
|
}
|
||||||
|
|
||||||
static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu)
|
static void __css_process_update_tree(struct cgroup_subsys_state *css, int cpu)
|
||||||
{
|
{
|
||||||
/* put @css and all ancestors on the corresponding updated lists */
|
/* put @css and all ancestors on the corresponding updated lists */
|
||||||
|
|
@ -170,7 +179,7 @@ static void css_process_update_tree(struct cgroup_subsys *ss, int cpu)
|
||||||
* flusher flush the stats updated by the updater who have
|
* flusher flush the stats updated by the updater who have
|
||||||
* observed that they are already on the list. The
|
* observed that they are already on the list. The
|
||||||
* corresponding barrier pair for this one should be before
|
* corresponding barrier pair for this one should be before
|
||||||
* css_rstat_updated() by the user.
|
* __css_rstat_updated() by the user.
|
||||||
*
|
*
|
||||||
* For now, there aren't any such user, so not adding the
|
* For now, there aren't any such user, so not adding the
|
||||||
* barrier here but if such a use-case arise, please add
|
* barrier here but if such a use-case arise, please add
|
||||||
|
|
@ -614,7 +623,7 @@ static void cgroup_base_stat_cputime_account_end(struct cgroup *cgrp,
|
||||||
unsigned long flags)
|
unsigned long flags)
|
||||||
{
|
{
|
||||||
u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
|
u64_stats_update_end_irqrestore(&rstatbc->bsync, flags);
|
||||||
css_rstat_updated(&cgrp->self, smp_processor_id());
|
__css_rstat_updated(&cgrp->self, smp_processor_id());
|
||||||
put_cpu_ptr(rstatbc);
|
put_cpu_ptr(rstatbc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -679,7 +679,7 @@ static inline void memcg_rstat_updated(struct mem_cgroup *memcg, long val,
|
||||||
if (!val)
|
if (!val)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
css_rstat_updated(&memcg->css, cpu);
|
__css_rstat_updated(&memcg->css, cpu);
|
||||||
statc_pcpu = memcg->vmstats_percpu;
|
statc_pcpu = memcg->vmstats_percpu;
|
||||||
for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
|
for (; statc_pcpu; statc_pcpu = statc->parent_pcpu) {
|
||||||
statc = this_cpu_ptr(statc_pcpu);
|
statc = this_cpu_ptr(statc_pcpu);
|
||||||
|
|
@ -2796,7 +2796,7 @@ static inline void account_slab_nmi_safe(struct mem_cgroup *memcg,
|
||||||
struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];
|
struct mem_cgroup_per_node *pn = memcg->nodeinfo[pgdat->node_id];
|
||||||
|
|
||||||
/* preemption is disabled in_nmi(). */
|
/* preemption is disabled in_nmi(). */
|
||||||
css_rstat_updated(&memcg->css, smp_processor_id());
|
__css_rstat_updated(&memcg->css, smp_processor_id());
|
||||||
if (idx == NR_SLAB_RECLAIMABLE_B)
|
if (idx == NR_SLAB_RECLAIMABLE_B)
|
||||||
atomic_add(nr, &pn->slab_reclaimable);
|
atomic_add(nr, &pn->slab_reclaimable);
|
||||||
else
|
else
|
||||||
|
|
@ -3019,7 +3019,7 @@ static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val)
|
||||||
mod_memcg_state(memcg, MEMCG_KMEM, val);
|
mod_memcg_state(memcg, MEMCG_KMEM, val);
|
||||||
} else {
|
} else {
|
||||||
/* preemption is disabled in_nmi(). */
|
/* preemption is disabled in_nmi(). */
|
||||||
css_rstat_updated(&memcg->css, smp_processor_id());
|
__css_rstat_updated(&memcg->css, smp_processor_id());
|
||||||
atomic_add(val, &memcg->kmem_stat);
|
atomic_add(val, &memcg->kmem_stat);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue