net: Add queue-create operation

Add a ynl netdev family operation called queue-create that creates a
new queue on a netdevice:

      name: queue-create
      attribute-set: queue
      flags: [admin-perm]
      do:
        request:
          attributes:
            - ifindex
            - type
            - lease
        reply: &queue-create-op
          attributes:
            - id

This is a generic operation such that it can be extended for various
use cases in future. Right now it is mandatory to specify ifindex,
the queue type which is enforced to rx and a lease. The newly created
queue id is returned to the caller.

A queue from a virtual device can have a lease which refers to another
queue from a physical device. This is useful for memory providers
and AF_XDP operations which take an ifindex and queue id to allow
applications to bind against virtual devices in containers. The lease
couples both queues together and allows to proxy the operations from
a virtual device in a container to the physical device.

In future, the nested lease attribute can be lifted and made optional
for other use-cases such as dynamic queue creation for physical
netdevs. The lack of lease and the specification of the physical
device as an ifindex will imply that we need a real queue to be
allocated. Similarly, the queue type enforcement to rx can then be
lifted as well to support tx.

An early implementation had only driver-specific integration [0], but
in order for other virtual devices to reuse, it makes sense to have
this as a generic API in core net.

For leasing queues, the virtual netdev must have real_num_rx_queue
less than num_rx_queues at the time of calling queue-create. The
queue-type must be rx as only rx queues are supported for leasing
for now. We also enforce that the queue-create ifindex must point
to a virtual device, and that the nested lease attribute's ifindex
must point to a physical device. The nested lease attribute set
contains a netns-id attribute which is currently only intended for
dumping as part of the queue-get operation. Also, it is modeled as
an s32 type similarly as done elsewhere in the stack.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Co-developed-by: David Wei <dw@davidwei.uk>
Signed-off-by: David Wei <dw@davidwei.uk>
Link: https://bpfconf.ebpf.io/bpfconf2025/bpfconf2025_material/lsfmmbpf_2025_netkit_borkmann.pdf [0]
Acked-by: Stanislav Fomichev <sdf@fomichev.me>
Reviewed-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://patch.msgid.link/20260115082603.219152-2-daniel@iogearbox.net
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
master
Daniel Borkmann 2026-01-15 09:25:48 +01:00 committed by Paolo Abeni
parent 4515ec4ad5
commit a5546e18f7
6 changed files with 93 additions and 0 deletions

View File

@ -339,6 +339,15 @@ attribute-sets:
doc: XSK information for this queue, if any.
type: nest
nested-attributes: xsk-info
-
name: lease
doc: |
A queue from a virtual device can have a lease which refers to
another queue from a physical device. This is useful for memory
providers and AF_XDP operations which take an ifindex and queue id
to allow applications to bind against virtual devices in containers.
type: nest
nested-attributes: lease
-
name: qstats
doc: |
@ -537,6 +546,24 @@ attribute-sets:
name: id
-
name: type
-
name: lease
attributes:
-
name: ifindex
doc: The netdev ifindex to lease the queue from.
type: u32
checks:
min: 1
-
name: queue
doc: The netdev queue to lease from.
type: nest
nested-attributes: queue-id
-
name: netns-id
doc: The network namespace id of the netdev.
type: s32
-
name: dmabuf
attributes:
@ -686,6 +713,7 @@ operations:
- dmabuf
- io-uring
- xsk
- lease
dump:
request:
attributes:
@ -797,6 +825,22 @@ operations:
reply:
attributes:
- id
-
name: queue-create
doc: |
Create a new queue for the given netdevice. Whether this operation
is supported depends on the device and the driver.
attribute-set: queue
flags: [admin-perm]
do:
request:
attributes:
- ifindex
- type
- lease
reply: &queue-create-op
attributes:
- id
kernel-family:
headers: ["net/netdev_netlink.h"]

View File

@ -160,6 +160,7 @@ enum {
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
NETDEV_A_QUEUE_LEASE,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@ -202,6 +203,15 @@ enum {
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
};
enum {
NETDEV_A_LEASE_IFINDEX = 1,
NETDEV_A_LEASE_QUEUE,
NETDEV_A_LEASE_NETNS_ID,
__NETDEV_A_LEASE_MAX,
NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
};
enum {
NETDEV_A_DMABUF_IFINDEX = 1,
NETDEV_A_DMABUF_QUEUES,
@ -228,6 +238,7 @@ enum {
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
NETDEV_CMD_BIND_TX,
NETDEV_CMD_QUEUE_CREATE,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)

View File

@ -28,6 +28,12 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range
};
/* Common nested types */
const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = {
[NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
[NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy),
[NETDEV_A_LEASE_NETNS_ID] = { .type = NLA_S32, },
};
const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = {
[NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range),
[NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range),
@ -107,6 +113,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1]
[NETDEV_A_DMABUF_FD] = { .type = NLA_U32, },
};
/* NETDEV_CMD_QUEUE_CREATE - do */
static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = {
[NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
[NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1),
[NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy),
};
/* Ops table for netdev */
static const struct genl_split_ops netdev_nl_ops[] = {
{
@ -205,6 +218,13 @@ static const struct genl_split_ops netdev_nl_ops[] = {
.maxattr = NETDEV_A_DMABUF_FD,
.flags = GENL_CMD_CAP_DO,
},
{
.cmd = NETDEV_CMD_QUEUE_CREATE,
.doit = netdev_nl_queue_create_doit,
.policy = netdev_queue_create_nl_policy,
.maxattr = NETDEV_A_QUEUE_LEASE,
.flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO,
},
};
static const struct genl_multicast_group netdev_nl_mcgrps[] = {

View File

@ -14,6 +14,7 @@
#include <net/netdev_netlink.h>
/* Common nested types */
extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1];
extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1];
extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1];
@ -36,6 +37,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb,
int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info);
int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info);
enum {
NETDEV_NLGRP_MGMT,

View File

@ -1120,6 +1120,11 @@ err_genlmsg_free:
return err;
}
int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info)
{
return -EOPNOTSUPP;
}
void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv)
{
INIT_LIST_HEAD(&priv->bindings);

View File

@ -160,6 +160,7 @@ enum {
NETDEV_A_QUEUE_DMABUF,
NETDEV_A_QUEUE_IO_URING,
NETDEV_A_QUEUE_XSK,
NETDEV_A_QUEUE_LEASE,
__NETDEV_A_QUEUE_MAX,
NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1)
@ -202,6 +203,15 @@ enum {
NETDEV_A_QSTATS_MAX = (__NETDEV_A_QSTATS_MAX - 1)
};
enum {
NETDEV_A_LEASE_IFINDEX = 1,
NETDEV_A_LEASE_QUEUE,
NETDEV_A_LEASE_NETNS_ID,
__NETDEV_A_LEASE_MAX,
NETDEV_A_LEASE_MAX = (__NETDEV_A_LEASE_MAX - 1)
};
enum {
NETDEV_A_DMABUF_IFINDEX = 1,
NETDEV_A_DMABUF_QUEUES,
@ -228,6 +238,7 @@ enum {
NETDEV_CMD_BIND_RX,
NETDEV_CMD_NAPI_SET,
NETDEV_CMD_BIND_TX,
NETDEV_CMD_QUEUE_CREATE,
__NETDEV_CMD_MAX,
NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1)