Skip to content

bpf/arena: Add kfunc for reserving arena memory #9235

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions kernel/bpf/arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,34 @@ static void arena_free_pages(struct bpf_arena *arena, long uaddr, long page_cnt)
}
}

/*
* Reserve an arena virtual address range without populating it. This call stops
* bpf_arena_alloc_pages from adding pages to this range.
*/
static int arena_reserve_pages(struct bpf_arena *arena, long uaddr, u32 page_cnt)
{
long page_cnt_max = (arena->user_vm_end - arena->user_vm_start) >> PAGE_SHIFT;
long pgoff;
int ret;

if (uaddr & ~PAGE_MASK)
return 0;

pgoff = compute_pgoff(arena, uaddr);
if (pgoff + page_cnt > page_cnt_max)
return -EINVAL;

guard(mutex)(&arena->lock);

/* Cannot guard already allocated pages. */
ret = is_range_tree_set(&arena->rt, pgoff, page_cnt);
if (ret)
return -EALREADY;

/* "Allocate" the region to prevent it from being allocated. */
return range_tree_clear(&arena->rt, pgoff, page_cnt);
}

__bpf_kfunc_start_defs();

__bpf_kfunc void *bpf_arena_alloc_pages(void *p__map, void *addr__ign, u32 page_cnt,
Expand All @@ -573,11 +601,26 @@ __bpf_kfunc void bpf_arena_free_pages(void *p__map, void *ptr__ign, u32 page_cnt
return;
arena_free_pages(arena, (long)ptr__ign, page_cnt);
}

__bpf_kfunc int bpf_arena_reserve_pages(void *p__map, void *ptr__ign, u32 page_cnt)
{
struct bpf_map *map = p__map;
struct bpf_arena *arena = container_of(map, struct bpf_arena, map);

if (map->map_type != BPF_MAP_TYPE_ARENA)
return -EINVAL;

if (!page_cnt)
return 0;

return arena_reserve_pages(arena, (long)ptr__ign, page_cnt);
}
__bpf_kfunc_end_defs();

BTF_KFUNCS_START(arena_kfuncs)
BTF_ID_FLAGS(func, bpf_arena_alloc_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_RET | KF_ARENA_ARG2)
BTF_ID_FLAGS(func, bpf_arena_free_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_ID_FLAGS(func, bpf_arena_reserve_pages, KF_TRUSTED_ARGS | KF_SLEEPABLE | KF_ARENA_ARG2)
BTF_KFUNCS_END(arena_kfuncs)

static const struct btf_kfunc_id_set common_kfunc_set = {
Expand Down
3 changes: 3 additions & 0 deletions tools/testing/selftests/bpf/bpf_arena_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,11 @@

void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
int node_id, __u64 flags) __ksym __weak;
int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;

#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)

#else /* when compiled as user space code */

#define __arena
Expand Down
106 changes: 106 additions & 0 deletions tools/testing/selftests/bpf/progs/verifier_arena.c
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,112 @@ int basic_alloc3(void *ctx)
return 0;
}

SEC("syscall")
__success __retval(0)
int basic_reserve1(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;

page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (!page)
return 1;

page += __PAGE_SIZE;

/* Reserve the second page */
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 2;

/* Try to explicitly allocate the reserved page. */
page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
if (page)
return 3;

/* Try to implicitly allocate the page (since there's only 2 of them). */
page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
if (page)
return 4;
#endif
return 0;
}

SEC("syscall")
__success __retval(0)
int basic_reserve2(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;

page = arena_base(&arena);
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 1;

page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
if ((u64)page)
return 2;
#endif
return 0;
}

/* Reserve the same page twice, should return -EALREADY. */
SEC("syscall")
__success __retval(0)
int reserve_twice(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;

page = arena_base(&arena);

ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret)
return 1;

/* Should be -EALREADY. */
ret = bpf_arena_reserve_pages(&arena, page, 1);
if (ret != -114)
return 2;
#endif
return 0;
}

/* Try to reserve past the end of the arena. */
SEC("syscall")
__success __retval(0)
int reserve_invalid_region(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *page;
int ret;

/* Try a NULL pointer. */
ret = bpf_arena_reserve_pages(&arena, NULL, 3);
if (ret != -22)
return 1;

page = arena_base(&arena);

ret = bpf_arena_reserve_pages(&arena, page, 3);
if (ret != -22)
return 2;

ret = bpf_arena_reserve_pages(&arena, page, 4096);
if (ret != -22)
return 3;

ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
if (ret != -22)
return 4;
#endif
return 0;
}

SEC("iter.s/bpf_map")
__success __log_level(2)
int iter_maps1(struct bpf_iter__bpf_map *ctx)
Expand Down
95 changes: 95 additions & 0 deletions tools/testing/selftests/bpf/progs/verifier_arena_large.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,101 @@ int big_alloc1(void *ctx)
return 0;
}

/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
SEC("syscall")
__success __retval(0)
int access_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page;
char __arena *base;
const size_t len = 4;
int ret, i;

/* Get a separate region of the arena. */
page = base = arena_base(&arena) + 16384 * PAGE_SIZE;

ret = bpf_arena_reserve_pages(&arena, base, len);
if (ret)
return 1;

/* Try to dirty reserved memory. */
for (i = 0; i < len && can_loop; i++)
*page = 0x5a;

for (i = 0; i < len && can_loop; i++) {
page = (volatile char __arena *)(base + i * PAGE_SIZE);

/*
* Error out in case either the write went through,
* or the address has random garbage.
*/
if (*page == 0x5a)
return 2 + 2 * i;

if (*page)
return 2 + 2 * i + 1;
}
#endif
return 0;
}

/* Try to allocate a region overlapping with a reservation. */
SEC("syscall")
__success __retval(0)
int request_partially_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
volatile char __arena *page;
char __arena *base;
int ret;

/* Add an arbitrary page offset. */
page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;

ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
if (ret)
return 1;

page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
if ((u64)page != 0ULL)
return 2;
#endif
return 0;
}

SEC("syscall")
__success __retval(0)
int free_reserved(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
char __arena *addr;
char __arena *page;
int ret;

/* Add an arbitrary page offset. */
addr = arena_base(&arena) + 32768 * __PAGE_SIZE;

page = bpf_arena_alloc_pages(&arena, addr, 4, NUMA_NO_NODE, 0);
if (!page)
return 1;

ret = bpf_arena_reserve_pages(&arena, addr + 4 * __PAGE_SIZE, 4);
if (ret)
return 2;

/* Freeing a reserved area, fully or partially, should succeed. */
bpf_arena_free_pages(&arena, addr, 2);
bpf_arena_free_pages(&arena, addr + 2 * __PAGE_SIZE, 2);

/* The free pages call above should have succeeded, so this allocation should too. */
page = bpf_arena_alloc_pages(&arena, addr + 3 * __PAGE_SIZE, 1, NUMA_NO_NODE, 0);
if (!page)
return 3;
#endif
return 0;
}

#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
#define PAGE_CNT 100
__u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
Expand Down
Loading