Merge branch 'bpf-arena-add-kfunc-for-reserving-arena-memory'

Emil Tsalapatis says:

====================
bpf/arena: Add kfunc for reserving arena memory

Add a new kfunc for BPF arenas that reserves a region of the mapping
to prevent it from being mapped. These regions serve as guards against
out-of-bounds accesses and are useful for debugging arena-related code.

>From v3 (20250709015712.97099-1-emil@etsalapatis.com)
------------------------------------------------------

- Added Acked-by tags by Yonghong.
- Replace hardcoded error numbers in selftests (Yonghong).
- Fixed selftest for partially freeing a reserved region (Yonghong).

>From v2 (20250702003351.197234-1-emil@etsalapatis.com)
------------------------------------------------------

- Removed -EALREADY and replaced with -EINVAL to bring error handling in
  line with the rest of the BPF code (Alexei).

>From v1 (20250620031118.245601-1-emil@etsalapatis.com)
------------------------------------------------------

- Removed the additional guard range tree. Adjusted tests accordingly.
  Reserved regions now behave like allocated regions, and can be
  unreserved using bpf_arena_free_pages(). They can also be allocated
  from userspace through minor faults. It is up to the user to prevent
  erroneous frees and/or use the BPF_F_SEGV_ON_FAULT flag to catch
  stray userspace accesses (Alexei).
- Changed terminology from guard pages to reserved pages (Alexei,
  Kartikeya).

Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
====================

Link: https://patch.msgid.link/20250709191312.29840-1-emil@etsalapatis.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
pull/1309/head
Alexei Starovoitov 2025-07-11 10:43:55 -07:00
commit 2b1fd82cba
4 changed files with 250 additions and 0 deletions

View File

@ -550,6 +550,34 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
}
}
/*
* Reserve an arena virtual address range without populating it. This call stops
* bpf_arena_alloc_pages from adding pages to this range.
*/
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
{
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
long pgoff;
int ret;
if (uaddr & ~PAGE_MASK)
return 0;
pgoff = compute_pgoff(arena, uaddr);
if (pgoff + page_cnt > page_cnt_max)
return -EINVAL;
guard(mutex)(&arena->lock);
/* Cannot guard already allocated pages. */
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
if (ret)
return -EBUSY;
/* "Allocate" the region to prevent it from being allocated. */
return range_tree_clear(&arena->rt, pgoff, page_cnt);
}
__bpf_kfunc_start_defs();
__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
@ -573,11 +601,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
return;
arena_free_pages(arena, (long)ptr__ign, page_cnt);
}
__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
{
struct bpf_map *map = p__map;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
if (map->map_type != BPF_MAP_TYPE_ARENA)
return -EINVAL;
if (!page_cnt)
return 0;
return arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
}
__bpf_kfunc_end_defs();
BTF_KFUNCS_START(arena_kfuncs)
BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_KFUNCS_END(arena_kfuncs)
static const struct btf_kfunc_id_set common_kfunc_set = {

View File

@ -46,8 +46,11 @@
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
int node_id, __u64 flags) __ksym __weak;
int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
#else /* when compiled as user space code */
#define __arena

View File

@ -3,6 +3,7 @@
#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
return 0;
}
SEC("syscall")
__success __retval(0)
int basic_reserve1(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (!page)
return 1;
page += __PAGE_SIZE;
/* Reserve the second page */
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 2;
/* Try to explicitly allocate the reserved page. */
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
if (page)
return 3;
/* Try to implicitly allocate the page (since there's only 2 of them). */
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (page)
return 4;
#endif
return 0;
}
SEC("syscall")
__success __retval(0)
int basic_reserve2(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;
page = arena_base(&arena);
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 1;
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
if ((u64)page)
return 2;
#endif
return 0;
}
/* Reserve the same page twice, should return -EBUSY. */
SEC("syscall")
__success __retval(0)
int reserve_twice(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;
page = arena_base(&arena);
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 1;
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret != -EBUSY)
return 2;
#endif
return 0;
}
/* Try to reserve past the end of the arena. */
SEC("syscall")
__success __retval(0)
int reserve_invalid_region(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;
/* Try a NULL pointer. */
ret = bpf_arena_reserve_pages(&arena, NULL, 3);
if (ret != -EINVAL)
return 1;
page = arena_base(&arena);
ret = bpf_arena_reserve_pages(&arena, page, 3);
if (ret != -EINVAL)
return 2;
ret = bpf_arena_reserve_pages(&arena, page, 4096);
if (ret != -EINVAL)
return 3;
ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
if (ret != -EINVAL)
return 4;
#endif
return 0;
}
SEC("iter.s/bpf_map")
__success __log_level(2)
int iter_maps1(struct bpf_iter__bpf_map *ctx)

View File

@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
return 0;
}
/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
SEC("syscall")
__success __retval(0)
int access_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page;
char __arena *base;
const size_t len = 4;
int ret, i;
/* Get a separate region of the arena. */
page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
ret = bpf_arena_reserve_pages(&arena, base, len);
if (ret)
return 1;
/* Try to dirty reserved memory. */
for (i = 0; i < len && can_loop; i++)
*page = 0x5a;
for (i = 0; i < len && can_loop; i++) {
page = (volatile char __arena *)(base + i * PAGE_SIZE);
/*
* Error out in case either the write went through,
* or the address has random garbage.
*/
if (*page == 0x5a)
return 2 + 2 * i;
if (*page)
return 2 + 2 * i + 1;
}
#endif
return 0;
}
/* Try to allocate a region overlapping with a reservation. */
SEC("syscall")
__success __retval(0)
int request_partially_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page;
char __arena *base;
int ret;
/* Add an arbitrary page offset. */
page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
if (ret)
return 1;
page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
if ((u64)page != 0ULL)
return 2;
#endif
return 0;
}
SEC("syscall")
__success __retval(0)
int free_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *addr;
char __arena *page;
int ret;
/* Add an arbitrary page offset. */
addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
if (!page)
return 1;
ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
if (ret)
return 2;
/*
* Reserved and allocated pages should be interchangeable for
* bpf_arena_free_pages(). Free a reserved and an allocated
* page with a single call.
*/
bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
/* The free call above should have succeeded, so this allocation should too. */
page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
if (!page)
return 3;
#endif
return 0;
}
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
#define PAGE_CNT 100
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */