codel: annotate data-races in codel_dump_stats()

codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.

Add READ_ONCE()/WRITE_ONCE() annotations.

Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.

tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.

No change in kernel size:

$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function                                     old     new   delta
codel_qdisc_dequeue                         2462    2465      +3
codel_dump_stats                             250     249      -1
Total: Before=29739919, After=29739921, chg +0.00%

Fixes: 76e3cc126b ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
master
Eric Dumazet 2026-04-07 14:30:53 +00:00 committed by Jakub Kicinski
parent dbc2bb4e87
commit ea25e03da7
2 changed files with 35 additions and 32 deletions

View File

@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
} }
skb_len = skb_len_func(skb); skb_len = skb_len_func(skb);
vars->ldelay = now - skb_time_func(skb); WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
if (unlikely(skb_len > stats->maxpacket)) if (unlikely(skb_len > stats->maxpacket))
stats->maxpacket = skb_len; WRITE_ONCE(stats->maxpacket, skb_len);
if (codel_time_before(vars->ldelay, params->target) || if (codel_time_before(vars->ldelay, params->target) ||
*backlog <= params->mtu) { *backlog <= params->mtu) {
@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (!skb) { if (!skb) {
vars->first_above_time = 0; vars->first_above_time = 0;
vars->dropping = false; WRITE_ONCE(vars->dropping, false);
return skb; return skb;
} }
now = codel_get_time(); now = codel_get_time();
@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (vars->dropping) { if (vars->dropping) {
if (!drop) { if (!drop) {
/* sojourn time below target - leave dropping state */ /* sojourn time below target - leave dropping state */
vars->dropping = false; WRITE_ONCE(vars->dropping, false);
} else if (codel_time_after_eq(now, vars->drop_next)) { } else if (codel_time_after_eq(now, vars->drop_next)) {
/* It's time for the next drop. Drop the current /* It's time for the next drop. Drop the current
* packet and dequeue the next. The dequeue might * packet and dequeue the next. The dequeue might
@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
*/ */
while (vars->dropping && while (vars->dropping &&
codel_time_after_eq(now, vars->drop_next)) { codel_time_after_eq(now, vars->drop_next)) {
vars->count++; /* dont care of possible wrap /* dont care of possible wrap
* since there is no more divide * since there is no more divide.
*/ */
WRITE_ONCE(vars->count, vars->count + 1);
codel_Newton_step(vars); codel_Newton_step(vars);
if (params->ecn && INET_ECN_set_ce(skb)) { if (params->ecn && INET_ECN_set_ce(skb)) {
stats->ecn_mark++; WRITE_ONCE(stats->ecn_mark,
vars->drop_next = stats->ecn_mark + 1);
WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next, codel_control_law(vars->drop_next,
params->interval, params->interval,
vars->rec_inv_sqrt); vars->rec_inv_sqrt));
goto end; goto end;
} }
stats->drop_len += skb_len_func(skb); stats->drop_len += skb_len_func(skb);
@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
skb_time_func, skb_time_func,
backlog, now)) { backlog, now)) {
/* leave dropping state */ /* leave dropping state */
vars->dropping = false; WRITE_ONCE(vars->dropping, false);
} else { } else {
/* and schedule the next drop */ /* and schedule the next drop */
vars->drop_next = WRITE_ONCE(vars->drop_next,
codel_control_law(vars->drop_next, codel_control_law(vars->drop_next,
params->interval, params->interval,
vars->rec_inv_sqrt); vars->rec_inv_sqrt));
} }
} }
} }
@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
u32 delta; u32 delta;
if (params->ecn && INET_ECN_set_ce(skb)) { if (params->ecn && INET_ECN_set_ce(skb)) {
stats->ecn_mark++; WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
} else { } else {
stats->drop_len += skb_len_func(skb); stats->drop_len += skb_len_func(skb);
drop_func(skb, ctx); drop_func(skb, ctx);
@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
stats, skb_len_func, stats, skb_len_func,
skb_time_func, backlog, now); skb_time_func, backlog, now);
} }
vars->dropping = true; WRITE_ONCE(vars->dropping, true);
/* if min went above target close to when we last went below it /* if min went above target close to when we last went below it
* assume that the drop rate that controlled the queue on the * assume that the drop rate that controlled the queue on the
* last cycle is a good starting point to control it now. * last cycle is a good starting point to control it now.
@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
if (delta > 1 && if (delta > 1 &&
codel_time_before(now - vars->drop_next, codel_time_before(now - vars->drop_next,
16 * params->interval)) { 16 * params->interval)) {
vars->count = delta; WRITE_ONCE(vars->count, delta);
/* we dont care if rec_inv_sqrt approximation /* we dont care if rec_inv_sqrt approximation
* is not very precise : * is not very precise :
* Next Newton steps will correct it quadratically. * Next Newton steps will correct it quadratically.
*/ */
codel_Newton_step(vars); codel_Newton_step(vars);
} else { } else {
vars->count = 1; WRITE_ONCE(vars->count, 1);
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT; vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
} }
vars->lastcount = vars->count; WRITE_ONCE(vars->lastcount, vars->count);
vars->drop_next = codel_control_law(now, params->interval, WRITE_ONCE(vars->drop_next,
vars->rec_inv_sqrt); codel_control_law(now, params->interval,
vars->rec_inv_sqrt));
} }
end: end:
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) { if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
@ -262,7 +265,7 @@ end:
params->ce_threshold_selector)); params->ce_threshold_selector));
} }
if (set_ce && INET_ECN_set_ce(skb)) if (set_ce && INET_ECN_set_ce(skb))
stats->ce_mark++; WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
} }
return skb; return skb;
} }

View File

@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_enqueue_tail(skb, sch); return qdisc_enqueue_tail(skb, sch);
} }
q = qdisc_priv(sch); q = qdisc_priv(sch);
q->drop_overlimit++; WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
} }
@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
{ {
const struct codel_sched_data *q = qdisc_priv(sch); const struct codel_sched_data *q = qdisc_priv(sch);
struct tc_codel_xstats st = { struct tc_codel_xstats st = {
.maxpacket = q->stats.maxpacket, .maxpacket = READ_ONCE(q->stats.maxpacket),
.count = q->vars.count, .count = READ_ONCE(q->vars.count),
.lastcount = q->vars.lastcount, .lastcount = READ_ONCE(q->vars.lastcount),
.drop_overlimit = q->drop_overlimit, .drop_overlimit = READ_ONCE(q->drop_overlimit),
.ldelay = codel_time_to_us(q->vars.ldelay), .ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)),
.dropping = q->vars.dropping, .dropping = READ_ONCE(q->vars.dropping),
.ecn_mark = q->stats.ecn_mark, .ecn_mark = READ_ONCE(q->stats.ecn_mark),
.ce_mark = q->stats.ce_mark, .ce_mark = READ_ONCE(q->stats.ce_mark),
}; };
if (q->vars.dropping) { if (st.dropping) {
codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time();
if (delta >= 0) if (delta >= 0)
st.drop_next = codel_time_to_us(delta); st.drop_next = codel_time_to_us(delta);