netfilter: nfnetlink_queue: make hash table per queue
Sharing a global hash table among all queues is tempting, but
it can cause crash:
BUG: KASAN: slab-use-after-free in nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
[..]
nfqnl_recv_verdict+0x11ac/0x15e0 [nfnetlink_queue]
nfnetlink_rcv_msg+0x46a/0x930
kmem_cache_alloc_node_noprof+0x11e/0x450
struct nf_queue_entry is freed via kfree, but parallel cpu can still
encounter such an nf_queue_entry when walking the list.
Alternative fix is to free the nf_queue_entry via kfree_rcu() instead,
but as we have to alloc/free for each skb this will cause more mem
pressure.
Cc: Scott Mitchell <scott.k.mitch1@gmail.com>
Fixes: e19079adcd ("netfilter: nfnetlink_queue: optimize verdict lookup with hash table")
Signed-off-by: Florian Westphal <fw@strlen.de>
master
parent
f8dca15a1b
commit
936206e3f6
|
|
@ -23,7 +23,6 @@ struct nf_queue_entry {
|
|||
struct nf_hook_state state;
|
||||
bool nf_ct_is_unconfirmed;
|
||||
u16 size; /* sizeof(entry) + saved route keys */
|
||||
u16 queue_num;
|
||||
|
||||
/* extra space to store route keys */
|
||||
};
|
||||
|
|
|
|||
|
|
@ -49,8 +49,8 @@
|
|||
#endif
|
||||
|
||||
#define NFQNL_QMAX_DEFAULT 1024
|
||||
#define NFQNL_HASH_MIN 1024
|
||||
#define NFQNL_HASH_MAX 1048576
|
||||
#define NFQNL_HASH_MIN 8
|
||||
#define NFQNL_HASH_MAX 32768
|
||||
|
||||
/* We're using struct nlattr which has 16bit nla_len. Note that nla_len
|
||||
* includes the header length. Thus, the maximum packet length that we
|
||||
|
|
@ -60,29 +60,10 @@
|
|||
*/
|
||||
#define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
|
||||
|
||||
/* Composite key for packet lookup: (net, queue_num, packet_id) */
|
||||
struct nfqnl_packet_key {
|
||||
possible_net_t net;
|
||||
u32 packet_id;
|
||||
u16 queue_num;
|
||||
} __aligned(sizeof(u32)); /* jhash2 requires 32-bit alignment */
|
||||
|
||||
/* Global rhashtable - one for entire system, all netns */
|
||||
static struct rhashtable nfqnl_packet_map __read_mostly;
|
||||
|
||||
/* Helper to initialize composite key */
|
||||
static inline void nfqnl_init_key(struct nfqnl_packet_key *key,
|
||||
struct net *net, u32 packet_id, u16 queue_num)
|
||||
{
|
||||
memset(key, 0, sizeof(*key));
|
||||
write_pnet(&key->net, net);
|
||||
key->packet_id = packet_id;
|
||||
key->queue_num = queue_num;
|
||||
}
|
||||
|
||||
struct nfqnl_instance {
|
||||
struct hlist_node hlist; /* global list of queues */
|
||||
struct rcu_head rcu;
|
||||
struct rhashtable nfqnl_packet_map;
|
||||
struct rcu_work rwork;
|
||||
|
||||
u32 peer_portid;
|
||||
unsigned int queue_maxlen;
|
||||
|
|
@ -106,6 +87,7 @@ struct nfqnl_instance {
|
|||
|
||||
typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
|
||||
|
||||
static struct workqueue_struct *nfq_cleanup_wq __read_mostly;
|
||||
static unsigned int nfnl_queue_net_id __read_mostly;
|
||||
|
||||
#define INSTANCE_BUCKETS 16
|
||||
|
|
@ -124,34 +106,10 @@ static inline u_int8_t instance_hashfn(u_int16_t queue_num)
|
|||
return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
|
||||
}
|
||||
|
||||
/* Extract composite key from nf_queue_entry for hashing */
|
||||
static u32 nfqnl_packet_obj_hashfn(const void *data, u32 len, u32 seed)
|
||||
{
|
||||
const struct nf_queue_entry *entry = data;
|
||||
struct nfqnl_packet_key key;
|
||||
|
||||
nfqnl_init_key(&key, entry->state.net, entry->id, entry->queue_num);
|
||||
|
||||
return jhash2((u32 *)&key, sizeof(key) / sizeof(u32), seed);
|
||||
}
|
||||
|
||||
/* Compare stack-allocated key against entry */
|
||||
static int nfqnl_packet_obj_cmpfn(struct rhashtable_compare_arg *arg,
|
||||
const void *obj)
|
||||
{
|
||||
const struct nfqnl_packet_key *key = arg->key;
|
||||
const struct nf_queue_entry *entry = obj;
|
||||
|
||||
return !net_eq(entry->state.net, read_pnet(&key->net)) ||
|
||||
entry->queue_num != key->queue_num ||
|
||||
entry->id != key->packet_id;
|
||||
}
|
||||
|
||||
static const struct rhashtable_params nfqnl_rhashtable_params = {
|
||||
.head_offset = offsetof(struct nf_queue_entry, hash_node),
|
||||
.key_len = sizeof(struct nfqnl_packet_key),
|
||||
.obj_hashfn = nfqnl_packet_obj_hashfn,
|
||||
.obj_cmpfn = nfqnl_packet_obj_cmpfn,
|
||||
.key_offset = offsetof(struct nf_queue_entry, id),
|
||||
.key_len = sizeof(u32),
|
||||
.automatic_shrinking = true,
|
||||
.min_size = NFQNL_HASH_MIN,
|
||||
.max_size = NFQNL_HASH_MAX,
|
||||
|
|
@ -190,6 +148,10 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
|
|||
spin_lock_init(&inst->lock);
|
||||
INIT_LIST_HEAD(&inst->queue_list);
|
||||
|
||||
err = rhashtable_init(&inst->nfqnl_packet_map, &nfqnl_rhashtable_params);
|
||||
if (err < 0)
|
||||
goto out_free;
|
||||
|
||||
spin_lock(&q->instances_lock);
|
||||
if (instance_lookup(q, queue_num)) {
|
||||
err = -EEXIST;
|
||||
|
|
@ -210,6 +172,8 @@ instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
|
|||
|
||||
out_unlock:
|
||||
spin_unlock(&q->instances_lock);
|
||||
rhashtable_destroy(&inst->nfqnl_packet_map);
|
||||
out_free:
|
||||
kfree(inst);
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
|
@ -217,15 +181,18 @@ out_unlock:
|
|||
static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
|
||||
unsigned long data);
|
||||
|
||||
static void
|
||||
instance_destroy_rcu(struct rcu_head *head)
|
||||
static void instance_destroy_work(struct work_struct *work)
|
||||
{
|
||||
struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
|
||||
rcu);
|
||||
struct nfqnl_instance *inst;
|
||||
|
||||
inst = container_of(to_rcu_work(work), struct nfqnl_instance,
|
||||
rwork);
|
||||
rcu_read_lock();
|
||||
nfqnl_flush(inst, NULL, 0);
|
||||
rcu_read_unlock();
|
||||
|
||||
rhashtable_destroy(&inst->nfqnl_packet_map);
|
||||
|
||||
kfree(inst);
|
||||
module_put(THIS_MODULE);
|
||||
}
|
||||
|
|
@ -234,7 +201,9 @@ static void
|
|||
__instance_destroy(struct nfqnl_instance *inst)
|
||||
{
|
||||
hlist_del_rcu(&inst->hlist);
|
||||
call_rcu(&inst->rcu, instance_destroy_rcu);
|
||||
|
||||
INIT_RCU_WORK(&inst->rwork, instance_destroy_work);
|
||||
queue_rcu_work(nfq_cleanup_wq, &inst->rwork);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
@ -250,9 +219,7 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
|
|||
{
|
||||
int err;
|
||||
|
||||
entry->queue_num = queue->queue_num;
|
||||
|
||||
err = rhashtable_insert_fast(&nfqnl_packet_map, &entry->hash_node,
|
||||
err = rhashtable_insert_fast(&queue->nfqnl_packet_map, &entry->hash_node,
|
||||
nfqnl_rhashtable_params);
|
||||
if (unlikely(err))
|
||||
return err;
|
||||
|
|
@ -266,23 +233,19 @@ __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
|
|||
static void
|
||||
__dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
|
||||
{
|
||||
rhashtable_remove_fast(&nfqnl_packet_map, &entry->hash_node,
|
||||
rhashtable_remove_fast(&queue->nfqnl_packet_map, &entry->hash_node,
|
||||
nfqnl_rhashtable_params);
|
||||
list_del(&entry->list);
|
||||
queue->queue_total--;
|
||||
}
|
||||
|
||||
static struct nf_queue_entry *
|
||||
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id,
|
||||
struct net *net)
|
||||
find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
|
||||
{
|
||||
struct nfqnl_packet_key key;
|
||||
struct nf_queue_entry *entry;
|
||||
|
||||
nfqnl_init_key(&key, net, id, queue->queue_num);
|
||||
|
||||
spin_lock_bh(&queue->lock);
|
||||
entry = rhashtable_lookup_fast(&nfqnl_packet_map, &key,
|
||||
entry = rhashtable_lookup_fast(&queue->nfqnl_packet_map, &id,
|
||||
nfqnl_rhashtable_params);
|
||||
|
||||
if (entry)
|
||||
|
|
@ -1531,7 +1494,7 @@ static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
|
|||
|
||||
verdict = ntohl(vhdr->verdict);
|
||||
|
||||
entry = find_dequeue_entry(queue, ntohl(vhdr->id), info->net);
|
||||
entry = find_dequeue_entry(queue, ntohl(vhdr->id));
|
||||
if (entry == NULL)
|
||||
return -ENOENT;
|
||||
|
||||
|
|
@ -1880,40 +1843,38 @@ static int __init nfnetlink_queue_init(void)
|
|||
{
|
||||
int status;
|
||||
|
||||
status = rhashtable_init(&nfqnl_packet_map, &nfqnl_rhashtable_params);
|
||||
if (status < 0)
|
||||
return status;
|
||||
nfq_cleanup_wq = alloc_ordered_workqueue("nfq_workqueue", 0);
|
||||
if (!nfq_cleanup_wq)
|
||||
return -ENOMEM;
|
||||
|
||||
status = register_pernet_subsys(&nfnl_queue_net_ops);
|
||||
if (status < 0) {
|
||||
pr_err("failed to register pernet ops\n");
|
||||
goto cleanup_rhashtable;
|
||||
}
|
||||
if (status < 0)
|
||||
goto cleanup_pernet_subsys;
|
||||
|
||||
netlink_register_notifier(&nfqnl_rtnl_notifier);
|
||||
status = nfnetlink_subsys_register(&nfqnl_subsys);
|
||||
if (status < 0) {
|
||||
pr_err("failed to create netlink socket\n");
|
||||
goto cleanup_netlink_notifier;
|
||||
}
|
||||
status = netlink_register_notifier(&nfqnl_rtnl_notifier);
|
||||
if (status < 0)
|
||||
goto cleanup_rtnl_notifier;
|
||||
|
||||
status = register_netdevice_notifier(&nfqnl_dev_notifier);
|
||||
if (status < 0) {
|
||||
pr_err("failed to register netdevice notifier\n");
|
||||
goto cleanup_netlink_subsys;
|
||||
}
|
||||
if (status < 0)
|
||||
goto cleanup_dev_notifier;
|
||||
|
||||
status = nfnetlink_subsys_register(&nfqnl_subsys);
|
||||
if (status < 0)
|
||||
goto cleanup_nfqnl_subsys;
|
||||
|
||||
nf_register_queue_handler(&nfqh);
|
||||
|
||||
return status;
|
||||
|
||||
cleanup_netlink_subsys:
|
||||
nfnetlink_subsys_unregister(&nfqnl_subsys);
|
||||
cleanup_netlink_notifier:
|
||||
cleanup_nfqnl_subsys:
|
||||
unregister_netdevice_notifier(&nfqnl_dev_notifier);
|
||||
cleanup_dev_notifier:
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
cleanup_rtnl_notifier:
|
||||
unregister_pernet_subsys(&nfnl_queue_net_ops);
|
||||
cleanup_rhashtable:
|
||||
rhashtable_destroy(&nfqnl_packet_map);
|
||||
cleanup_pernet_subsys:
|
||||
destroy_workqueue(nfq_cleanup_wq);
|
||||
return status;
|
||||
}
|
||||
|
||||
|
|
@ -1924,9 +1885,7 @@ static void __exit nfnetlink_queue_fini(void)
|
|||
nfnetlink_subsys_unregister(&nfqnl_subsys);
|
||||
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
|
||||
unregister_pernet_subsys(&nfnl_queue_net_ops);
|
||||
|
||||
rhashtable_destroy(&nfqnl_packet_map);
|
||||
|
||||
destroy_workqueue(nfq_cleanup_wq);
|
||||
rcu_barrier(); /* Wait for completion of call_rcu()'s */
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue