vrf: Remove unnecessary RCU protection around dst entries

During initialization of a VRF device, the VRF driver creates two dst
entries (for IPv4 and IPv6). They are attached to locally generated
packets that are transmitted out of the VRF ports (via the
l3mdev_l3_out() hook). Their purpose is to redirect packets towards the
VRF device instead of having the packets egress directly out of the VRF
ports. This is useful, for example, when a queuing discipline is
configured on the VRF device.

In order to avoid a NULL pointer dereference, commit b0e95ccdd7 ("net:
vrf: protect changes to private data with rcu") made the pointers to the
dst entries RCU protected. As far as I can tell, this was needed because
back then the dst entries were released (and the pointers reset to NULL)
before removing the VRF ports.

Later on, commit f630c38ef0 ("vrf: fix bug_on triggered by rx when
destroying a vrf") moved the removal of the VRF ports to the VRF
device's dellink() callback. As such, the tear down sequence of a VRF
device looks as follows:

1. VRF ports are removed.
2. VRF device is unregistered.
    a. Device is closed.
    b. An RCU grace period passes.
    c. ndo_uninit() is called.
        i. dst entries are released.

Given the above, the Tx path will always see the same fully initialized
dst entries and will never race with the ndo_uninit() callback.

Therefore, there is no need to make the pointers to the dst entries RCU
protected. Remove it as well as the unnecessary NULL checks in the Tx
path.

Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://patch.msgid.link/20260326203233.1128554-4-idosch@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
master
Ido Schimmel 2026-03-26 22:32:33 +02:00 committed by Jakub Kicinski
parent 50504e2579
commit 075196489a
1 changed files with 12 additions and 44 deletions

View File

@ -112,8 +112,8 @@ struct netns_vrf {
};
struct net_vrf {
struct rtable __rcu *rth;
struct rt6_info __rcu *rt6;
struct rtable *rth;
struct rt6_info *rt6;
#if IS_ENABLED(CONFIG_IPV6)
struct fib6_table *fib6_table;
#endif
@ -648,26 +648,13 @@ static struct sk_buff *vrf_ip6_out_redirect(struct net_device *vrf_dev,
struct sk_buff *skb)
{
struct net_vrf *vrf = netdev_priv(vrf_dev);
struct dst_entry *dst = NULL;
struct rt6_info *rt6;
rcu_read_lock();
rt6 = rcu_dereference(vrf->rt6);
if (likely(rt6)) {
dst = &rt6->dst;
dst_hold(dst);
}
rcu_read_unlock();
if (unlikely(!dst)) {
vrf_tx_error(vrf_dev, skb);
return NULL;
}
rt6 = vrf->rt6;
dst_hold(&rt6->dst);
skb_dst_drop(skb);
skb_dst_set(skb, dst);
skb_dst_set(skb, &rt6->dst);
return skb;
}
@ -750,10 +737,7 @@ static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rt6_info *rt6 = rtnl_dereference(vrf->rt6);
RCU_INIT_POINTER(vrf->rt6, NULL);
synchronize_rcu();
struct rt6_info *rt6 = vrf->rt6;
if (rt6) {
dst_dev_put(&rt6->dst);
@ -784,7 +768,7 @@ static int vrf_rt6_create(struct net_device *dev)
rt6->dst.output = vrf_output6;
rcu_assign_pointer(vrf->rt6, rt6);
vrf->rt6 = rt6;
rc = 0;
out:
@ -870,26 +854,13 @@ static struct sk_buff *vrf_ip_out_redirect(struct net_device *vrf_dev,
struct sk_buff *skb)
{
struct net_vrf *vrf = netdev_priv(vrf_dev);
struct dst_entry *dst = NULL;
struct rtable *rth;
rcu_read_lock();
rth = rcu_dereference(vrf->rth);
if (likely(rth)) {
dst = &rth->dst;
dst_hold(dst);
}
rcu_read_unlock();
if (unlikely(!dst)) {
vrf_tx_error(vrf_dev, skb);
return NULL;
}
rth = vrf->rth;
dst_hold(&rth->dst);
skb_dst_drop(skb);
skb_dst_set(skb, dst);
skb_dst_set(skb, &rth->dst);
return skb;
}
@ -989,10 +960,7 @@ static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev,
/* holding rtnl */
static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf)
{
struct rtable *rth = rtnl_dereference(vrf->rth);
RCU_INIT_POINTER(vrf->rth, NULL);
synchronize_rcu();
struct rtable *rth = vrf->rth;
dst_dev_put(&rth->dst);
dst_release(&rth->dst);
@ -1013,7 +981,7 @@ static int vrf_rtable_create(struct net_device *dev)
rth->dst.output = vrf_output;
rcu_assign_pointer(vrf->rth, rth);
vrf->rth = rth;
return 0;
}