netfilter: nf_conncount: rework API to use sk_buff directly

When using nf_conncount infrastructure for non-confirmed connections a
duplicated track is possible due to an optimization introduced since
commit d265929930 ("netfilter: nf_conncount: reduce unnecessary GC").

In order to fix this introduce a new conncount API that receives
directly an sk_buff struct.  It fetches the tuple and zone and the
corresponding ct from it. It comes with both existing conncount variants
nf_conncount_count_skb() and nf_conncount_add_skb(). In addition remove
the old API and adjust all the users to use the new one.

This way, for each sk_buff struct it is possible to check if there is a
ct present and already confirmed. If so, skip the add operation.

Fixes: d265929930 ("netfilter: nf_conncount: reduce unnecessary GC")
Signed-off-by: Fernando Fernandez Mancera <fmancera@suse.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
pull/1354/merge
Fernando Fernandez Mancera 2025-11-21 01:14:30 +01:00 committed by Pablo Neira Ayuso
parent fe8313316e
commit be102eb6a0
5 changed files with 145 additions and 106 deletions

View File

@ -18,15 +18,14 @@ struct nf_conncount_list {
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen); struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen);
void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data); void nf_conncount_destroy(struct net *net, struct nf_conncount_data *data);
unsigned int nf_conncount_count(struct net *net, unsigned int nf_conncount_count_skb(struct net *net,
const struct sk_buff *skb,
u16 l3num,
struct nf_conncount_data *data, struct nf_conncount_data *data,
const u32 *key, const u32 *key);
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone);
int nf_conncount_add(struct net *net, struct nf_conncount_list *list, int nf_conncount_add_skb(struct net *net, const struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple, u16 l3num, struct nf_conncount_list *list);
const struct nf_conntrack_zone *zone);
void nf_conncount_list_init(struct nf_conncount_list *list); void nf_conncount_list_init(struct nf_conncount_list *list);

View File

@ -122,15 +122,65 @@ find_or_evict(struct net *net, struct nf_conncount_list *list,
return ERR_PTR(-EAGAIN); return ERR_PTR(-EAGAIN);
} }
static int __nf_conncount_add(struct net *net, static bool get_ct_or_tuple_from_skb(struct net *net,
struct nf_conncount_list *list, const struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple, u16 l3num,
const struct nf_conntrack_zone *zone) struct nf_conn **ct,
struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone **zone,
bool *refcounted)
{ {
const struct nf_conntrack_tuple_hash *h;
enum ip_conntrack_info ctinfo;
struct nf_conn *found_ct;
found_ct = nf_ct_get(skb, &ctinfo);
if (found_ct && !nf_ct_is_template(found_ct)) {
*tuple = found_ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
*zone = nf_ct_zone(found_ct);
*ct = found_ct;
return true;
}
if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), l3num, net, tuple))
return false;
if (found_ct)
*zone = nf_ct_zone(found_ct);
h = nf_conntrack_find_get(net, *zone, tuple);
if (!h)
return true;
found_ct = nf_ct_tuplehash_to_ctrack(h);
*refcounted = true;
*ct = found_ct;
return true;
}
static int __nf_conncount_add(struct net *net,
const struct sk_buff *skb,
u16 l3num,
struct nf_conncount_list *list)
{
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
const struct nf_conntrack_tuple_hash *found; const struct nf_conntrack_tuple_hash *found;
struct nf_conncount_tuple *conn, *conn_n; struct nf_conncount_tuple *conn, *conn_n;
struct nf_conntrack_tuple tuple;
struct nf_conn *ct = NULL;
struct nf_conn *found_ct; struct nf_conn *found_ct;
unsigned int collect = 0; unsigned int collect = 0;
bool refcounted = false;
if (!get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted))
return -ENOENT;
if (ct && nf_ct_is_confirmed(ct)) {
if (refcounted)
nf_ct_put(ct);
return 0;
}
if ((u32)jiffies == list->last_gc) if ((u32)jiffies == list->last_gc)
goto add_new_node; goto add_new_node;
@ -144,10 +194,10 @@ static int __nf_conncount_add(struct net *net,
if (IS_ERR(found)) { if (IS_ERR(found)) {
/* Not found, but might be about to be confirmed */ /* Not found, but might be about to be confirmed */
if (PTR_ERR(found) == -EAGAIN) { if (PTR_ERR(found) == -EAGAIN) {
if (nf_ct_tuple_equal(&conn->tuple, tuple) && if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
nf_ct_zone_id(&conn->zone, conn->zone.dir) == nf_ct_zone_id(&conn->zone, conn->zone.dir) ==
nf_ct_zone_id(zone, zone->dir)) nf_ct_zone_id(zone, zone->dir))
return 0; /* already exists */ goto out_put; /* already exists */
} else { } else {
collect++; collect++;
} }
@ -156,7 +206,7 @@ static int __nf_conncount_add(struct net *net,
found_ct = nf_ct_tuplehash_to_ctrack(found); found_ct = nf_ct_tuplehash_to_ctrack(found);
if (nf_ct_tuple_equal(&conn->tuple, tuple) && if (nf_ct_tuple_equal(&conn->tuple, &tuple) &&
nf_ct_zone_equal(found_ct, zone, zone->dir)) { nf_ct_zone_equal(found_ct, zone, zone->dir)) {
/* /*
* We should not see tuples twice unless someone hooks * We should not see tuples twice unless someone hooks
@ -165,7 +215,7 @@ static int __nf_conncount_add(struct net *net,
* Attempt to avoid a re-add in this case. * Attempt to avoid a re-add in this case.
*/ */
nf_ct_put(found_ct); nf_ct_put(found_ct);
return 0; goto out_put;
} else if (already_closed(found_ct)) { } else if (already_closed(found_ct)) {
/* /*
* we do not care about connections which are * we do not care about connections which are
@ -188,31 +238,35 @@ add_new_node:
if (conn == NULL) if (conn == NULL)
return -ENOMEM; return -ENOMEM;
conn->tuple = *tuple; conn->tuple = tuple;
conn->zone = *zone; conn->zone = *zone;
conn->cpu = raw_smp_processor_id(); conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies; conn->jiffies32 = (u32)jiffies;
list_add_tail(&conn->node, &list->head); list_add_tail(&conn->node, &list->head);
list->count++; list->count++;
list->last_gc = (u32)jiffies; list->last_gc = (u32)jiffies;
out_put:
if (refcounted)
nf_ct_put(ct);
return 0; return 0;
} }
int nf_conncount_add(struct net *net, int nf_conncount_add_skb(struct net *net,
struct nf_conncount_list *list, const struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple, u16 l3num,
const struct nf_conntrack_zone *zone) struct nf_conncount_list *list)
{ {
int ret; int ret;
/* check the saved connections */ /* check the saved connections */
spin_lock_bh(&list->list_lock); spin_lock_bh(&list->list_lock);
ret = __nf_conncount_add(net, list, tuple, zone); ret = __nf_conncount_add(net, skb, l3num, list);
spin_unlock_bh(&list->list_lock); spin_unlock_bh(&list->list_lock);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(nf_conncount_add); EXPORT_SYMBOL_GPL(nf_conncount_add_skb);
void nf_conncount_list_init(struct nf_conncount_list *list) void nf_conncount_list_init(struct nf_conncount_list *list)
{ {
@ -309,19 +363,22 @@ static void schedule_gc_worker(struct nf_conncount_data *data, int tree)
static unsigned int static unsigned int
insert_tree(struct net *net, insert_tree(struct net *net,
const struct sk_buff *skb,
u16 l3num,
struct nf_conncount_data *data, struct nf_conncount_data *data,
struct rb_root *root, struct rb_root *root,
unsigned int hash, unsigned int hash,
const u32 *key, const u32 *key)
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{ {
struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES]; struct nf_conncount_rb *gc_nodes[CONNCOUNT_GC_MAX_NODES];
struct rb_node **rbnode, *parent; const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
struct nf_conncount_rb *rbconn; bool do_gc = true, refcounted = false;
struct nf_conncount_tuple *conn;
unsigned int count = 0, gc_count = 0; unsigned int count = 0, gc_count = 0;
bool do_gc = true; struct rb_node **rbnode, *parent;
struct nf_conntrack_tuple tuple;
struct nf_conncount_tuple *conn;
struct nf_conncount_rb *rbconn;
struct nf_conn *ct = NULL;
spin_lock_bh(&nf_conncount_locks[hash]); spin_lock_bh(&nf_conncount_locks[hash]);
restart: restart:
@ -340,7 +397,7 @@ restart:
} else { } else {
int ret; int ret;
ret = nf_conncount_add(net, &rbconn->list, tuple, zone); ret = nf_conncount_add_skb(net, skb, l3num, &rbconn->list);
if (ret) if (ret)
count = 0; /* hotdrop */ count = 0; /* hotdrop */
else else
@ -364,6 +421,7 @@ restart:
goto restart; goto restart;
} }
if (get_ct_or_tuple_from_skb(net, skb, l3num, &ct, &tuple, &zone, &refcounted)) {
/* expected case: match, insert new node */ /* expected case: match, insert new node */
rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC); rbconn = kmem_cache_alloc(conncount_rb_cachep, GFP_ATOMIC);
if (rbconn == NULL) if (rbconn == NULL)
@ -375,7 +433,7 @@ restart:
goto out_unlock; goto out_unlock;
} }
conn->tuple = *tuple; conn->tuple = tuple;
conn->zone = *zone; conn->zone = *zone;
conn->cpu = raw_smp_processor_id(); conn->cpu = raw_smp_processor_id();
conn->jiffies32 = (u32)jiffies; conn->jiffies32 = (u32)jiffies;
@ -388,6 +446,10 @@ restart:
rb_link_node_rcu(&rbconn->node, parent, rbnode); rb_link_node_rcu(&rbconn->node, parent, rbnode);
rb_insert_color(&rbconn->node, root); rb_insert_color(&rbconn->node, root);
if (refcounted)
nf_ct_put(ct);
}
out_unlock: out_unlock:
spin_unlock_bh(&nf_conncount_locks[hash]); spin_unlock_bh(&nf_conncount_locks[hash]);
return count; return count;
@ -395,10 +457,10 @@ out_unlock:
static unsigned int static unsigned int
count_tree(struct net *net, count_tree(struct net *net,
const struct sk_buff *skb,
u16 l3num,
struct nf_conncount_data *data, struct nf_conncount_data *data,
const u32 *key, const u32 *key)
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{ {
struct rb_root *root; struct rb_root *root;
struct rb_node *parent; struct rb_node *parent;
@ -422,7 +484,7 @@ count_tree(struct net *net,
} else { } else {
int ret; int ret;
if (!tuple) { if (!skb) {
nf_conncount_gc_list(net, &rbconn->list); nf_conncount_gc_list(net, &rbconn->list);
return rbconn->list.count; return rbconn->list.count;
} }
@ -437,7 +499,7 @@ count_tree(struct net *net,
} }
/* same source network -> be counted! */ /* same source network -> be counted! */
ret = __nf_conncount_add(net, &rbconn->list, tuple, zone); ret = __nf_conncount_add(net, skb, l3num, &rbconn->list);
spin_unlock_bh(&rbconn->list.list_lock); spin_unlock_bh(&rbconn->list.list_lock);
if (ret) if (ret)
return 0; /* hotdrop */ return 0; /* hotdrop */
@ -446,10 +508,10 @@ count_tree(struct net *net,
} }
} }
if (!tuple) if (!skb)
return 0; return 0;
return insert_tree(net, data, root, hash, key, tuple, zone); return insert_tree(net, skb, l3num, data, root, hash, key);
} }
static void tree_gc_worker(struct work_struct *work) static void tree_gc_worker(struct work_struct *work)
@ -511,18 +573,19 @@ next:
} }
/* Count and return number of conntrack entries in 'net' with particular 'key'. /* Count and return number of conntrack entries in 'net' with particular 'key'.
* If 'tuple' is not null, insert it into the accounting data structure. * If 'skb' is not null, insert the corresponding tuple into the accounting
* Call with RCU read lock. * data structure. Call with RCU read lock.
*/ */
unsigned int nf_conncount_count(struct net *net, unsigned int nf_conncount_count_skb(struct net *net,
const struct sk_buff *skb,
u16 l3num,
struct nf_conncount_data *data, struct nf_conncount_data *data,
const u32 *key, const u32 *key)
const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_zone *zone)
{ {
return count_tree(net, data, key, tuple, zone); return count_tree(net, skb, l3num, data, key);
} }
EXPORT_SYMBOL_GPL(nf_conncount_count); EXPORT_SYMBOL_GPL(nf_conncount_count_skb);
struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen) struct nf_conncount_data *nf_conncount_init(struct net *net, unsigned int keylen)
{ {

View File

@ -24,26 +24,11 @@ static inline void nft_connlimit_do_eval(struct nft_connlimit *priv,
const struct nft_pktinfo *pkt, const struct nft_pktinfo *pkt,
const struct nft_set_ext *ext) const struct nft_set_ext *ext)
{ {
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
const struct nf_conntrack_tuple *tuple_ptr;
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
const struct nf_conn *ct;
unsigned int count; unsigned int count;
int err;
tuple_ptr = &tuple; err = nf_conncount_add_skb(nft_net(pkt), pkt->skb, nft_pf(pkt), priv->list);
if (err) {
ct = nf_ct_get(pkt->skb, &ctinfo);
if (ct != NULL) {
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(pkt->skb, skb_network_offset(pkt->skb),
nft_pf(pkt), nft_net(pkt), &tuple)) {
regs->verdict.code = NF_DROP;
return;
}
if (nf_conncount_add(nft_net(pkt), priv->list, tuple_ptr, zone)) {
regs->verdict.code = NF_DROP; regs->verdict.code = NF_DROP;
return; return;
} }

View File

@ -31,8 +31,6 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
{ {
struct net *net = xt_net(par); struct net *net = xt_net(par);
const struct xt_connlimit_info *info = par->matchinfo; const struct xt_connlimit_info *info = par->matchinfo;
struct nf_conntrack_tuple tuple;
const struct nf_conntrack_tuple *tuple_ptr = &tuple;
const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt; const struct nf_conntrack_zone *zone = &nf_ct_zone_dflt;
enum ip_conntrack_info ctinfo; enum ip_conntrack_info ctinfo;
const struct nf_conn *ct; const struct nf_conn *ct;
@ -40,13 +38,8 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
u32 key[5]; u32 key[5];
ct = nf_ct_get(skb, &ctinfo); ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) { if (ct)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
zone = nf_ct_zone(ct); zone = nf_ct_zone(ct);
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
xt_family(par), net, &tuple)) {
goto hotdrop;
}
if (xt_family(par) == NFPROTO_IPV6) { if (xt_family(par) == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb); const struct ipv6hdr *iph = ipv6_hdr(skb);
@ -69,10 +62,9 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
key[1] = zone->id; key[1] = zone->id;
} }
connections = nf_conncount_count(net, info->data, key, tuple_ptr, connections = nf_conncount_count_skb(net, skb, xt_family(par), info->data, key);
zone);
if (connections == 0) if (connections == 0)
/* kmalloc failed, drop it entirely */ /* kmalloc failed or tuple couldn't be found, drop it entirely */
goto hotdrop; goto hotdrop;
return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT); return (connections > info->limit) ^ !!(info->flags & XT_CONNLIMIT_INVERT);

View File

@ -928,8 +928,8 @@ static u32 ct_limit_get(const struct ovs_ct_limit_info *info, u16 zone)
} }
static int ovs_ct_check_limit(struct net *net, static int ovs_ct_check_limit(struct net *net,
const struct ovs_conntrack_info *info, const struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple) const struct ovs_conntrack_info *info)
{ {
struct ovs_net *ovs_net = net_generic(net, ovs_net_id); struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info; const struct ovs_ct_limit_info *ct_limit_info = ovs_net->ct_limit_info;
@ -942,8 +942,9 @@ static int ovs_ct_check_limit(struct net *net,
if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED) if (per_zone_limit == OVS_CT_LIMIT_UNLIMITED)
return 0; return 0;
connections = nf_conncount_count(net, ct_limit_info->data, connections = nf_conncount_count_skb(net, skb, info->family,
&conncount_key, tuple, &info->zone); ct_limit_info->data,
&conncount_key);
if (connections > per_zone_limit) if (connections > per_zone_limit)
return -ENOMEM; return -ENOMEM;
@ -972,8 +973,7 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
#if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT) #if IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
if (static_branch_unlikely(&ovs_ct_limit_enabled)) { if (static_branch_unlikely(&ovs_ct_limit_enabled)) {
if (!nf_ct_is_confirmed(ct)) { if (!nf_ct_is_confirmed(ct)) {
err = ovs_ct_check_limit(net, info, err = ovs_ct_check_limit(net, skb, info);
&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
if (err) { if (err) {
net_warn_ratelimited("openvswitch: zone: %u " net_warn_ratelimited("openvswitch: zone: %u "
"exceeds conntrack limit\n", "exceeds conntrack limit\n",
@ -1770,8 +1770,8 @@ static int __ovs_ct_limit_get_zone_limit(struct net *net,
zone_limit.limit = limit; zone_limit.limit = limit;
nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0); nf_ct_zone_init(&ct_zone, zone_id, NF_CT_DEFAULT_ZONE_DIR, 0);
zone_limit.count = nf_conncount_count(net, data, &conncount_key, NULL, zone_limit.count = nf_conncount_count_skb(net, NULL, 0, data,
&ct_zone); &conncount_key);
return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit); return nla_put_nohdr(reply, sizeof(zone_limit), &zone_limit);
} }