codel: annotate data-races in codel_dump_stats()
codel_dump_stats() only runs with RTNL held,
reading fields that can be changed in qdisc fast path.
Add READ_ONCE()/WRITE_ONCE() annotations.
Alternative would be to acquire the qdisc spinlock, but our long-term
goal is to make qdisc dump operations lockless as much as we can.
tc_codel_xstats fields don't need to be latched atomically,
otherwise this bug would have been caught earlier.
No change in kernel size:
$ scripts/bloat-o-meter -t vmlinux.0 vmlinux
add/remove: 0/0 grow/shrink: 1/1 up/down: 3/-1 (2)
Function old new delta
codel_qdisc_dequeue 2462 2465 +3
codel_dump_stats 250 249 -1
Total: Before=29739919, After=29739921, chg +0.00%
Fixes: 76e3cc126b ("codel: Controlled Delay AQM")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20260407143053.1570620-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
master
parent
dbc2bb4e87
commit
ea25e03da7
|
|
@ -120,10 +120,10 @@ static bool codel_should_drop(const struct sk_buff *skb,
|
||||||
}
|
}
|
||||||
|
|
||||||
skb_len = skb_len_func(skb);
|
skb_len = skb_len_func(skb);
|
||||||
vars->ldelay = now - skb_time_func(skb);
|
WRITE_ONCE(vars->ldelay, now - skb_time_func(skb));
|
||||||
|
|
||||||
if (unlikely(skb_len > stats->maxpacket))
|
if (unlikely(skb_len > stats->maxpacket))
|
||||||
stats->maxpacket = skb_len;
|
WRITE_ONCE(stats->maxpacket, skb_len);
|
||||||
|
|
||||||
if (codel_time_before(vars->ldelay, params->target) ||
|
if (codel_time_before(vars->ldelay, params->target) ||
|
||||||
*backlog <= params->mtu) {
|
*backlog <= params->mtu) {
|
||||||
|
|
@ -159,7 +159,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
|
|
||||||
if (!skb) {
|
if (!skb) {
|
||||||
vars->first_above_time = 0;
|
vars->first_above_time = 0;
|
||||||
vars->dropping = false;
|
WRITE_ONCE(vars->dropping, false);
|
||||||
return skb;
|
return skb;
|
||||||
}
|
}
|
||||||
now = codel_get_time();
|
now = codel_get_time();
|
||||||
|
|
@ -168,7 +168,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
if (vars->dropping) {
|
if (vars->dropping) {
|
||||||
if (!drop) {
|
if (!drop) {
|
||||||
/* sojourn time below target - leave dropping state */
|
/* sojourn time below target - leave dropping state */
|
||||||
vars->dropping = false;
|
WRITE_ONCE(vars->dropping, false);
|
||||||
} else if (codel_time_after_eq(now, vars->drop_next)) {
|
} else if (codel_time_after_eq(now, vars->drop_next)) {
|
||||||
/* It's time for the next drop. Drop the current
|
/* It's time for the next drop. Drop the current
|
||||||
* packet and dequeue the next. The dequeue might
|
* packet and dequeue the next. The dequeue might
|
||||||
|
|
@ -180,16 +180,18 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
*/
|
*/
|
||||||
while (vars->dropping &&
|
while (vars->dropping &&
|
||||||
codel_time_after_eq(now, vars->drop_next)) {
|
codel_time_after_eq(now, vars->drop_next)) {
|
||||||
vars->count++; /* dont care of possible wrap
|
/* dont care of possible wrap
|
||||||
* since there is no more divide
|
* since there is no more divide.
|
||||||
*/
|
*/
|
||||||
|
WRITE_ONCE(vars->count, vars->count + 1);
|
||||||
codel_Newton_step(vars);
|
codel_Newton_step(vars);
|
||||||
if (params->ecn && INET_ECN_set_ce(skb)) {
|
if (params->ecn && INET_ECN_set_ce(skb)) {
|
||||||
stats->ecn_mark++;
|
WRITE_ONCE(stats->ecn_mark,
|
||||||
vars->drop_next =
|
stats->ecn_mark + 1);
|
||||||
|
WRITE_ONCE(vars->drop_next,
|
||||||
codel_control_law(vars->drop_next,
|
codel_control_law(vars->drop_next,
|
||||||
params->interval,
|
params->interval,
|
||||||
vars->rec_inv_sqrt);
|
vars->rec_inv_sqrt));
|
||||||
goto end;
|
goto end;
|
||||||
}
|
}
|
||||||
stats->drop_len += skb_len_func(skb);
|
stats->drop_len += skb_len_func(skb);
|
||||||
|
|
@ -202,13 +204,13 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
skb_time_func,
|
skb_time_func,
|
||||||
backlog, now)) {
|
backlog, now)) {
|
||||||
/* leave dropping state */
|
/* leave dropping state */
|
||||||
vars->dropping = false;
|
WRITE_ONCE(vars->dropping, false);
|
||||||
} else {
|
} else {
|
||||||
/* and schedule the next drop */
|
/* and schedule the next drop */
|
||||||
vars->drop_next =
|
WRITE_ONCE(vars->drop_next,
|
||||||
codel_control_law(vars->drop_next,
|
codel_control_law(vars->drop_next,
|
||||||
params->interval,
|
params->interval,
|
||||||
vars->rec_inv_sqrt);
|
vars->rec_inv_sqrt));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -216,7 +218,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
u32 delta;
|
u32 delta;
|
||||||
|
|
||||||
if (params->ecn && INET_ECN_set_ce(skb)) {
|
if (params->ecn && INET_ECN_set_ce(skb)) {
|
||||||
stats->ecn_mark++;
|
WRITE_ONCE(stats->ecn_mark, stats->ecn_mark + 1);
|
||||||
} else {
|
} else {
|
||||||
stats->drop_len += skb_len_func(skb);
|
stats->drop_len += skb_len_func(skb);
|
||||||
drop_func(skb, ctx);
|
drop_func(skb, ctx);
|
||||||
|
|
@ -227,7 +229,7 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
stats, skb_len_func,
|
stats, skb_len_func,
|
||||||
skb_time_func, backlog, now);
|
skb_time_func, backlog, now);
|
||||||
}
|
}
|
||||||
vars->dropping = true;
|
WRITE_ONCE(vars->dropping, true);
|
||||||
/* if min went above target close to when we last went below it
|
/* if min went above target close to when we last went below it
|
||||||
* assume that the drop rate that controlled the queue on the
|
* assume that the drop rate that controlled the queue on the
|
||||||
* last cycle is a good starting point to control it now.
|
* last cycle is a good starting point to control it now.
|
||||||
|
|
@ -236,19 +238,20 @@ static struct sk_buff *codel_dequeue(void *ctx,
|
||||||
if (delta > 1 &&
|
if (delta > 1 &&
|
||||||
codel_time_before(now - vars->drop_next,
|
codel_time_before(now - vars->drop_next,
|
||||||
16 * params->interval)) {
|
16 * params->interval)) {
|
||||||
vars->count = delta;
|
WRITE_ONCE(vars->count, delta);
|
||||||
/* we dont care if rec_inv_sqrt approximation
|
/* we dont care if rec_inv_sqrt approximation
|
||||||
* is not very precise :
|
* is not very precise :
|
||||||
* Next Newton steps will correct it quadratically.
|
* Next Newton steps will correct it quadratically.
|
||||||
*/
|
*/
|
||||||
codel_Newton_step(vars);
|
codel_Newton_step(vars);
|
||||||
} else {
|
} else {
|
||||||
vars->count = 1;
|
WRITE_ONCE(vars->count, 1);
|
||||||
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
|
vars->rec_inv_sqrt = ~0U >> REC_INV_SQRT_SHIFT;
|
||||||
}
|
}
|
||||||
vars->lastcount = vars->count;
|
WRITE_ONCE(vars->lastcount, vars->count);
|
||||||
vars->drop_next = codel_control_law(now, params->interval,
|
WRITE_ONCE(vars->drop_next,
|
||||||
vars->rec_inv_sqrt);
|
codel_control_law(now, params->interval,
|
||||||
|
vars->rec_inv_sqrt));
|
||||||
}
|
}
|
||||||
end:
|
end:
|
||||||
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
|
if (skb && codel_time_after(vars->ldelay, params->ce_threshold)) {
|
||||||
|
|
@ -262,7 +265,7 @@ end:
|
||||||
params->ce_threshold_selector));
|
params->ce_threshold_selector));
|
||||||
}
|
}
|
||||||
if (set_ce && INET_ECN_set_ce(skb))
|
if (set_ce && INET_ECN_set_ce(skb))
|
||||||
stats->ce_mark++;
|
WRITE_ONCE(stats->ce_mark, stats->ce_mark + 1);
|
||||||
}
|
}
|
||||||
return skb;
|
return skb;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -85,7 +85,7 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
|
||||||
return qdisc_enqueue_tail(skb, sch);
|
return qdisc_enqueue_tail(skb, sch);
|
||||||
}
|
}
|
||||||
q = qdisc_priv(sch);
|
q = qdisc_priv(sch);
|
||||||
q->drop_overlimit++;
|
WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1);
|
||||||
return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
|
return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -221,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
|
||||||
{
|
{
|
||||||
const struct codel_sched_data *q = qdisc_priv(sch);
|
const struct codel_sched_data *q = qdisc_priv(sch);
|
||||||
struct tc_codel_xstats st = {
|
struct tc_codel_xstats st = {
|
||||||
.maxpacket = q->stats.maxpacket,
|
.maxpacket = READ_ONCE(q->stats.maxpacket),
|
||||||
.count = q->vars.count,
|
.count = READ_ONCE(q->vars.count),
|
||||||
.lastcount = q->vars.lastcount,
|
.lastcount = READ_ONCE(q->vars.lastcount),
|
||||||
.drop_overlimit = q->drop_overlimit,
|
.drop_overlimit = READ_ONCE(q->drop_overlimit),
|
||||||
.ldelay = codel_time_to_us(q->vars.ldelay),
|
.ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)),
|
||||||
.dropping = q->vars.dropping,
|
.dropping = READ_ONCE(q->vars.dropping),
|
||||||
.ecn_mark = q->stats.ecn_mark,
|
.ecn_mark = READ_ONCE(q->stats.ecn_mark),
|
||||||
.ce_mark = q->stats.ce_mark,
|
.ce_mark = READ_ONCE(q->stats.ce_mark),
|
||||||
};
|
};
|
||||||
|
|
||||||
if (q->vars.dropping) {
|
if (st.dropping) {
|
||||||
codel_tdiff_t delta = q->vars.drop_next - codel_get_time();
|
codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time();
|
||||||
|
|
||||||
if (delta >= 0)
|
if (delta >= 0)
|
||||||
st.drop_next = codel_time_to_us(delta);
|
st.drop_next = codel_time_to_us(delta);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue