Merge branch 'bpf-arena-add-kfunc-for-reserving-arena-memory'
Emil Tsalapatis says: ==================== bpf/arena: Add kfunc for reserving arena memory Add a new kfunc for BPF arenas that reserves a region of the mapping to prevent it from being mapped. These regions serve as guards against out-of-bounds accesses and are useful for debugging arena-related code. >From v3 (20250709015712.97099-1-emil@etsalapatis.com) ------------------------------------------------------ - Added Acked-by tags by Yonghong. - Replace hardcoded error numbers in selftests (Yonghong). - Fixed selftest for partially freeing a reserved region (Yonghong). >From v2 (20250702003351.197234-1-emil@etsalapatis.com) ------------------------------------------------------ - Removed -EALREADY and replaced with -EINVAL to bring error handling in line with the rest of the BPF code (Alexei). >From v1 (20250620031118.245601-1-emil@etsalapatis.com) ------------------------------------------------------ - Removed the additional guard range tree. Adjusted tests accordingly. Reserved regions now behave like allocated regions, and can be unreserved using bpf_arena_free_pages(). They can also be allocated from userspace through minor faults. It is up to the user to prevent erroneous frees and/or use the BPF_F_SEGV_ON_FAULT flag to catch stray userspace accesses (Alexei). - Changed terminology from guard pages to reserved pages (Alexei, Kartikeya). Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com> ==================== Link: https://patch.msgid.link/20250709191312.29840-1-emil@etsalapatis.com Signed-off-by: Alexei Starovoitov <ast@kernel.org>pull/1309/head
commit
2b1fd82cba
|
|
@ -550,6 +550,34 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve an arena virtual address range without populating it. This call stops
|
||||
* bpf_arena_alloc_pages from adding pages to this range.
|
||||
*/
|
||||
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
|
||||
{
|
||||
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
|
||||
long pgoff;
|
||||
int ret;
|
||||
|
||||
if (uaddr & ~PAGE_MASK)
|
||||
return 0;
|
||||
|
||||
pgoff = compute_pgoff(arena, uaddr);
|
||||
if (pgoff + page_cnt > page_cnt_max)
|
||||
return -EINVAL;
|
||||
|
||||
guard(mutex)(&arena->lock);
|
||||
|
||||
/* Cannot guard already allocated pages. */
|
||||
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
|
||||
if (ret)
|
||||
return -EBUSY;
|
||||
|
||||
/* "Allocate" the region to prevent it from being allocated. */
|
||||
return range_tree_clear(&arena->rt, pgoff, page_cnt);
|
||||
}
|
||||
|
||||
__bpf_kfunc_start_defs();
|
||||
|
||||
__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
|
||||
|
|
@ -573,11 +601,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
|
|||
return;
|
||||
arena_free_pages(arena, (long)ptr__ign, page_cnt);
|
||||
}
|
||||
|
||||
__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
|
||||
{
|
||||
struct bpf_map *map = p__map;
|
||||
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);
|
||||
|
||||
if (map->map_type != BPF_MAP_TYPE_ARENA)
|
||||
return -EINVAL;
|
||||
|
||||
if (!page_cnt)
|
||||
return 0;
|
||||
|
||||
return arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
|
||||
}
|
||||
__bpf_kfunc_end_defs();
|
||||
|
||||
BTF_KFUNCS_START(arena_kfuncs)
|
||||
BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
|
||||
BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
|
||||
BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
|
||||
BTF_KFUNCS_END(arena_kfuncs)
|
||||
|
||||
static const struct btf_kfunc_id_set common_kfunc_set = {
|
||||
|
|
|
|||
|
|
@ -46,8 +46,11 @@
|
|||
|
||||
void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
|
||||
int node_id, __u64 flags) __ksym __weak;
|
||||
int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
|
||||
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
|
||||
|
||||
#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
|
||||
|
||||
#else /* when compiled as user space code */
|
||||
|
||||
#define __arena
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#define BPF_NO_KFUNC_PROTOTYPES
|
||||
#include <vmlinux.h>
|
||||
#include <errno.h>
|
||||
#include <bpf/bpf_helpers.h>
|
||||
#include <bpf/bpf_tracing.h>
|
||||
#include "bpf_misc.h"
|
||||
|
|
@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int basic_reserve1(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
char __arena *page;
|
||||
int ret;
|
||||
|
||||
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (!page)
|
||||
return 1;
|
||||
|
||||
page += __PAGE_SIZE;
|
||||
|
||||
/* Reserve the second page */
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 1);
|
||||
if (ret)
|
||||
return 2;
|
||||
|
||||
/* Try to explicitly allocate the reserved page. */
|
||||
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
|
||||
if (page)
|
||||
return 3;
|
||||
|
||||
/* Try to implicitly allocate the page (since there's only 2 of them). */
|
||||
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
|
||||
if (page)
|
||||
return 4;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int basic_reserve2(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
char __arena *page;
|
||||
int ret;
|
||||
|
||||
page = arena_base(&arena);
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 1);
|
||||
if (ret)
|
||||
return 1;
|
||||
|
||||
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
|
||||
if ((u64)page)
|
||||
return 2;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Reserve the same page twice, should return -EBUSY. */
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int reserve_twice(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
char __arena *page;
|
||||
int ret;
|
||||
|
||||
page = arena_base(&arena);
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 1);
|
||||
if (ret)
|
||||
return 1;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 1);
|
||||
if (ret != -EBUSY)
|
||||
return 2;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to reserve past the end of the arena. */
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int reserve_invalid_region(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
char __arena *page;
|
||||
int ret;
|
||||
|
||||
/* Try a NULL pointer. */
|
||||
ret = bpf_arena_reserve_pages(&arena, NULL, 3);
|
||||
if (ret != -EINVAL)
|
||||
return 1;
|
||||
|
||||
page = arena_base(&arena);
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 3);
|
||||
if (ret != -EINVAL)
|
||||
return 2;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, page, 4096);
|
||||
if (ret != -EINVAL)
|
||||
return 3;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
|
||||
if (ret != -EINVAL)
|
||||
return 4;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("iter.s/bpf_map")
|
||||
__success __log_level(2)
|
||||
int iter_maps1(struct bpf_iter__bpf_map *ctx)
|
||||
|
|
|
|||
|
|
@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int access_reserved(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
volatile char __arena *page;
|
||||
char __arena *base;
|
||||
const size_t len = 4;
|
||||
int ret, i;
|
||||
|
||||
/* Get a separate region of the arena. */
|
||||
page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, base, len);
|
||||
if (ret)
|
||||
return 1;
|
||||
|
||||
/* Try to dirty reserved memory. */
|
||||
for (i = 0; i < len && can_loop; i++)
|
||||
*page = 0x5a;
|
||||
|
||||
for (i = 0; i < len && can_loop; i++) {
|
||||
page = (volatile char __arena *)(base + i * PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Error out in case either the write went through,
|
||||
* or the address has random garbage.
|
||||
*/
|
||||
if (*page == 0x5a)
|
||||
return 2 + 2 * i;
|
||||
|
||||
if (*page)
|
||||
return 2 + 2 * i + 1;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Try to allocate a region overlapping with a reservation. */
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int request_partially_reserved(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
volatile char __arena *page;
|
||||
char __arena *base;
|
||||
int ret;
|
||||
|
||||
/* Add an arbitrary page offset. */
|
||||
page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
|
||||
if (ret)
|
||||
return 1;
|
||||
|
||||
page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
|
||||
if ((u64)page != 0ULL)
|
||||
return 2;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
SEC("syscall")
|
||||
__success __retval(0)
|
||||
int free_reserved(void *ctx)
|
||||
{
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
char __arena *addr;
|
||||
char __arena *page;
|
||||
int ret;
|
||||
|
||||
/* Add an arbitrary page offset. */
|
||||
addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
|
||||
|
||||
page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
|
||||
if (!page)
|
||||
return 1;
|
||||
|
||||
ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
|
||||
if (ret)
|
||||
return 2;
|
||||
|
||||
/*
|
||||
* Reserved and allocated pages should be interchangeable for
|
||||
* bpf_arena_free_pages(). Free a reserved and an allocated
|
||||
* page with a single call.
|
||||
*/
|
||||
bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
|
||||
|
||||
/* The free call above should have succeeded, so this allocation should too. */
|
||||
page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
|
||||
if (!page)
|
||||
return 3;
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
|
||||
#define PAGE_CNT 100
|
||||
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
|
||||
|
|
|
|||
Loading…
Reference in New Issue