net: Proxy netif_mp_{open,close}_rxq for leased queues

When a process in a container wants to setup a memory provider, it will
use the virtual netdev and a leased rxq, and call netif_mp_{open,close}_rxq
to try and restart the queue. At this point, proxy the queue restart on
the real rxq in the physical netdev.

For memory providers (io_uring zero-copy rx and devmem), it causes the
real rxq in the physical netdev to be filled from a memory provider that
has DMA mapped memory from a process within a container.

Signed-off-by: David Wei <dw@davidwei.uk>
Co-developed-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260402231031.447597-7-daniel@iogearbox.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
master
David Wei 2026-04-03 01:10:23 +02:00 committed by Jakub Kicinski
parent 1e91c98bc9
commit 5602ad61eb
3 changed files with 95 additions and 20 deletions

View File

@ -12350,10 +12350,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev)
for (i = 0; i < dev->real_num_rx_queues; i++) {
struct netdev_rx_queue *rxq = &dev->_rx[i];
struct pp_memory_provider_params *p = &rxq->mp_params;
if (p->mp_ops && p->mp_ops->uninstall)
p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq);
__netif_mp_uninstall_rxq(rxq, &rxq->mp_params);
}
}

View File

@ -12,6 +12,7 @@ struct net;
struct netlink_ext_ack;
struct netdev_queue_config;
struct cpumask;
struct pp_memory_provider_params;
/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
@ -101,6 +102,12 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx,
bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx);
bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx);
void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
const struct pp_memory_provider_params *p);
void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
struct netdev_rx_queue *virt_rxq);
/* netdev management, shared between various uAPI entry points */
struct netdev_name_node {
struct hlist_node hlist;

View File

@ -28,6 +28,8 @@ void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst,
netdev_assert_locked(rxq_dst->dev);
netdev_assert_locked(rxq_src->dev);
netif_rxq_cleanup_unlease(rxq_src, rxq_dst);
WRITE_ONCE(rxq_src->lease, NULL);
WRITE_ONCE(rxq_dst->lease, NULL);
@ -200,24 +202,15 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx)
}
EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL");
int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack)
static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack)
{
const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops;
struct netdev_queue_config qcfg[2];
struct netdev_rx_queue *rxq;
int ret;
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) {
NL_SET_ERR_MSG(extack, "tcp-data-split is disabled");
return -EINVAL;
@ -264,16 +257,48 @@ err_clear_mp:
return ret;
}
void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
const struct pp_memory_provider_params *old_p)
int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx,
const struct pp_memory_provider_params *p,
struct netlink_ext_ack *extack)
{
struct net_device *orig_dev = dev;
int ret;
if (!netdev_need_ops_lock(dev))
return -EOPNOTSUPP;
if (rxq_idx >= dev->real_num_rx_queues) {
NL_SET_ERR_MSG(extack, "rx queue index out of range");
return -ERANGE;
}
rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues);
if (!netif_rxq_is_leased(dev, rxq_idx))
return __netif_mp_open_rxq(dev, rxq_idx, p, extack);
if (!netif_get_rx_queue_lease_locked(&dev, &rxq_idx)) {
NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev");
return -EBUSY;
}
if (!dev->dev.parent) {
NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev");
ret = -EOPNOTSUPP;
goto out;
}
ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack);
out:
netif_put_rx_queue_lease_locked(orig_dev, dev);
return ret;
}
static void __netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
const struct pp_memory_provider_params *old_p)
{
struct netdev_queue_config qcfg[2];
struct netdev_rx_queue *rxq;
int err;
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
rxq = __netif_get_rx_queue(dev, ifq_idx);
/* Callers holding a netdev ref may get here after we already
@ -294,3 +319,48 @@ void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]);
WARN_ON(err && err != -ENETDOWN);
}
void netif_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx,
const struct pp_memory_provider_params *old_p)
{
struct net_device *orig_dev = dev;
if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues))
return;
if (!netif_rxq_is_leased(dev, ifq_idx))
return __netif_mp_close_rxq(dev, ifq_idx, old_p);
if (WARN_ON_ONCE(!netif_get_rx_queue_lease_locked(&dev, &ifq_idx)))
return;
__netif_mp_close_rxq(dev, ifq_idx, old_p);
netif_put_rx_queue_lease_locked(orig_dev, dev);
}
void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq,
const struct pp_memory_provider_params *p)
{
if (p->mp_ops && p->mp_ops->uninstall)
p->mp_ops->uninstall(p->mp_priv, rxq);
}
/* Clean up memory provider state when a queue lease is torn down. If
* a memory provider was installed on the physical queue via the lease,
* close it now. The memory provider is a property of the queue itself,
* and it was _guaranteed_ to be installed on the physical queue via
* the lease redirection. The extra __netif_mp_close_rxq is needed
* since the physical queue can outlive the virtual queue in the lease
* case, so it needs to be reconfigured to clear the memory provider.
*/
void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq,
struct netdev_rx_queue *virt_rxq)
{
struct pp_memory_provider_params *p = &phys_rxq->mp_params;
unsigned int ifq_idx = get_netdev_rx_queue_index(phys_rxq);
if (!p->mp_ops)
return;
__netif_mp_uninstall_rxq(virt_rxq, p);
__netif_mp_close_rxq(phys_rxq->dev, ifq_idx, p);
}